"""
This file is part of Automunge which is released under GNU General Public License v3.0.
See file LICENSE or go to (anonymized) for full license details.

contact available via (anonymized)

Copyright (C) 2018, 2019, 2020, 2021 (anonymized) - All Rights Reserved

patent pending, applications (anonymized)
"""

#global imports
import numpy as np
import pandas as pd
from copy import deepcopy

#imports for process_time, postprocess_time 
import datetime as dt

#imports for process_bxcx 
from scipy import stats

#imports for process_hldy 
from pandas.tseries.holiday import USFederalHolidayCalendar

#imports for evalcategory, getNArows
from collections import Counter
import datetime as dt
from scipy.stats import shapiro
from scipy.stats import skew

#imports for predictinfill, predictpostinfill, trainFSmodel
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
# from sklearn.metrics import mean_squared_error
# from sklearn.metrics import roc_auc_score
from sklearn.metrics import accuracy_score
#stats may be used for cases where user elects RandomSearchCV hyperparameter tuning
# from scipy import stats

#imports for shuffleaccuracy
# from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_log_error

#imports for PCA dimensionality reduction
from sklearn.decomposition import PCA
from sklearn.decomposition import SparsePCA
from sklearn.decomposition import KernelPCA

#imports for automunge
import random
#import datetime as dt
import types

class AutoMunge:
  
  def __init__(self):
    pass

  def _assembletransformdict(self, binstransform, NArw_marker):
    '''
    #assembles the range of transformations to be applied based on the evaluated \
    #category of data
    #the primitives are intented as follows:
    #_greatgrandparents_: supplemental column derived from source column, only applied
    #to first generation, with downstream transforms included
    #_grandparents_: supplemental column derived from source column, only applied
    #to first generation
    #_parents_: replace source column, with downstream trasnforms performed
    #_siblings_: supplemental column derived from source column, \
    #with downstream transforms performed
    #_auntsuncles_: replace source column, without downstream transforms performed
    #_cousins_: supplemental column derived from source column, \
    #without downstream transforms performed
    #downstream transform primitives are:
    #_children_: becomes downstream parent
    #_niecenephews_: treated like a downstream sibling
    #_coworkers_: becomes a downstream auntsuncles
    #_friends_: become downstream cousins    
    
    #for example, if we set 'bxcx' entry to have both 'bxcx' as parents and \
    #'nmbr' as cousin, then the output would be column_nmbr, column_bxcx_nmbr
    #(because 'bxcx' has a downstream primitive entry of 'nmbr' as well 
    
    #note a future extension will allow automunge class to run experiments
    #on different configurations of trasnform_dict to improve the feature selection
    '''

    transform_dict = {}

    #initialize bins based on what was passed through application of automunge(.)
    if binstransform is True:
      bint = 'bint'
    else:
      bint = None
        
    if NArw_marker is True:
      NArw = 'NArw'
    else:
      NArw = None

    #initialize trasnform_dict. Note in a future extension the range of categories
    #is intended to be built out
    transform_dict.update({'nmbr' : {'parents'       : ['nmbr'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : [bint]}})
    
    transform_dict.update({'dxdt' : {'parents'       : ['dxdt'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['retn'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['retn'], \
                                     'friends'       : []}})
    
    transform_dict.update({'d2dt' : {'parents'       : ['d2dt'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['retn'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['dxdt'], \
                                     'coworkers'     : ['retn'], \
                                     'friends'       : []}})
    
    transform_dict.update({'d3dt' : {'parents'       : ['d3dt'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['retn'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['d2dt'], \
                                     'coworkers'     : ['retn'], \
                                     'friends'       : []}})

    transform_dict.update({'d4dt' : {'parents'       : ['d4dt'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['retn'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['d3dt'], \
                                     'coworkers'     : ['retn'], \
                                     'friends'       : []}})

    transform_dict.update({'d5dt' : {'parents'       : ['d5dt'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['retn'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['d4dt'], \
                                     'coworkers'     : ['retn'], \
                                     'friends'       : []}})

    transform_dict.update({'d6dt' : {'parents'       : ['d6dt'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['retn'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['d5dt'], \
                                     'coworkers'     : ['retn'], \
                                     'friends'       : []}})
    
    transform_dict.update({'dxd2' : {'parents'       : ['dxd2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['retn'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['retn'], \
                                     'friends'       : []}})
    
    transform_dict.update({'d2d2' : {'parents'       : ['d2d2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['retn'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['dxd2'], \
                                     'coworkers'     : ['retn'], \
                                     'friends'       : []}})
    
    transform_dict.update({'d3d2' : {'parents'       : ['d3d2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['retn'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['d2d2'], \
                                     'coworkers'     : ['retn'], \
                                     'friends'       : []}})

    transform_dict.update({'d4d2' : {'parents'       : ['d4d2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['retn'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['d3d2'], \
                                     'coworkers'     : ['retn'], \
                                     'friends'       : []}})

    transform_dict.update({'d5d2' : {'parents'       : ['d5d2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['retn'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['d4d2'], \
                                     'coworkers'     : ['retn'], \
                                     'friends'       : []}})

    transform_dict.update({'d6d2' : {'parents'       : ['d6d2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['retn'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['d5d2'], \
                                     'coworkers'     : ['retn'], \
                                     'friends'       : []}})
    
    transform_dict.update({'nmdx' : {'parents'       : ['nmdx'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['dxdt'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['retn'], \
                                     'friends'       : []}})
    
    transform_dict.update({'nmd2' : {'parents'       : ['nmd2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['d2dt'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['retn'], \
                                     'friends'       : []}})
    
    transform_dict.update({'nmd3' : {'parents'       : ['nmd3'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['d3dt'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['retn'], \
                                     'friends'       : []}})

    transform_dict.update({'nmd4' : {'parents'       : ['nmd4'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['d4dt'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['retn'], \
                                     'friends'       : []}})

    transform_dict.update({'nmd5' : {'parents'       : ['nmd5'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['d5dt'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['retn'], \
                                     'friends'       : []}})

    transform_dict.update({'nmd6' : {'parents'       : ['nmd6'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['d6dt'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['retn'], \
                                     'friends'       : []}})
    
    transform_dict.update({'mmdx' : {'parents'       : ['mmdx'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['nbr2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['nbr2'], \
                                     'friends'       : []}})
    
    transform_dict.update({'mmd2' : {'parents'       : ['mmd2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['nbr2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['mmdx'], \
                                     'coworkers'     : ['nbr2'], \
                                     'friends'       : []}})
    
    transform_dict.update({'mmd3' : {'parents'       : ['mmd3'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['nbr2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['mmd2'], \
                                     'coworkers'     : ['nbr2'], \
                                     'friends'       : []}})

    transform_dict.update({'mmd4' : {'parents'       : ['mmd4'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['nbr2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['mmd3'], \
                                     'coworkers'     : ['nbr2'], \
                                     'friends'       : []}})

    transform_dict.update({'mmd5' : {'parents'       : ['mmd5'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['nbr2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['mmd4'], \
                                     'coworkers'     : ['nbr2'], \
                                     'friends'       : []}})

    transform_dict.update({'mmd6' : {'parents'       : ['mmd6'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['nbr2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['mmd5'], \
                                     'coworkers'     : ['nbr2'], \
                                     'friends'       : []}})
    
    transform_dict.update({'dddt' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['dddt', 'exc2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'ddd2' : {'parents'       : ['ddd2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['exc2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['dddt'], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'ddd3' : {'parents'       : ['ddd3'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['exc2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['ddd2'], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'ddd4' : {'parents'       : ['ddd4'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['exc2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['ddd3'], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'ddd5' : {'parents'       : ['ddd5'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['exc2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['ddd4'], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'ddd6' : {'parents'       : ['ddd6'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['exc2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['ddd5'], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'dedt' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['dedt', 'exc2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'ded2' : {'parents'       : ['ded2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['exc2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['dedt'], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'ded3' : {'parents'       : ['ded3'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['exc2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['ded2'], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'ded4' : {'parents'       : ['ded4'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['exc2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['ded3'], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'ded5' : {'parents'       : ['ded5'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['exc2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['ded4'], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'ded6' : {'parents'       : ['ded6'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['exc2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['ded5'], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'shft' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['shft'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'shf2' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['shf2'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'shf3' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['shf3'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'shf4' : {'parents'       : ['shf4'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['retn'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['retn'], \
                                     'friends'       : []}})
  
    transform_dict.update({'shf5' : {'parents'       : ['shf5'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['retn'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['retn'], \
                                     'friends'       : []}})
    
    transform_dict.update({'shf6' : {'parents'       : ['shf6'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['retn'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['retn'], \
                                     'friends'       : []}})

    transform_dict.update({'shf7' : {'parents'       : ['shf4', 'shf5'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['retn'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['retn'], \
                                     'friends'       : []}})
    
    transform_dict.update({'shf8' : {'parents'       : ['shf4', 'shf5', 'shf6'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['retn'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['retn'], \
                                     'friends'       : []}})

    transform_dict.update({'bnry' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['bnry'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'bnr2' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['bnr2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'onht' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['onht'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'text' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['text'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'txt2' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['text'], \
                                     'cousins'       : [NArw, 'splt'], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'txt3' : {'parents'       : ['txt3'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['text'], \
                                     'friends'       : []}})

    transform_dict.update({'smth' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['smth'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'fsmh' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['fsmh'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'lngt' : {'parents'       : ['lngt'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['mnmx'], \
                                     'friends'       : []}})
  
    transform_dict.update({'lnlg' : {'parents'       : ['lnlg'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['log0'], \
                                     'friends'       : []}})

    transform_dict.update({'UPCS' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['UPCS'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'Unht' : {'parents'       : ['Unht'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['onht'], \
                                     'friends'       : []}})
  
    transform_dict.update({'Utxt' : {'parents'       : ['Utxt'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['text'], \
                                     'friends'       : []}})
    
    transform_dict.update({'Utx2' : {'parents'       : ['Utx2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['text'], \
                                     'friends'       : ['splt']}})

    transform_dict.update({'Utx3' : {'parents'       : ['Utx3'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['txt3'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'Ucct' : {'parents'       : ['Ucct'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['ucct', 'ord3'], \
                                     'friends'       : []}})
    
    transform_dict.update({'Uord' : {'parents'       : ['Uord'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['ordl'], \
                                     'friends'       : []}})
        
    transform_dict.update({'Uor2' : {'parents'       : ['Uor2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['ord2'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'Uor3' : {'parents'       : ['Uor3'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['ord3'], \
                                     'friends'       : []}})
    
    transform_dict.update({'Uor6' : {'parents'       : ['Uor6'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['spl6'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['ord3'], \
                                     'friends'       : []}})
    
    transform_dict.update({'U101' : {'parents'       : ['U101'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['1010'], \
                                     'friends'       : []}})
    
    transform_dict.update({'splt' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['splt'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'spl2' : {'parents'       : ['spl2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['ord3'], \
                                     'friends'       : []}})
    
    transform_dict.update({'spl5' : {'parents'       : ['spl5'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['ord3'], \
                                     'friends'       : []}})
    
    transform_dict.update({'spl6' : {'parents'       : ['spl6'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['splt'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : ['ord3']}})
    
    transform_dict.update({'spl7' : {'parents'       : ['spl7'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['ord3'], \
                                     'friends'       : []}})

    transform_dict.update({'spl8' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['spl8'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'spl9' : {'parents'       : ['spl9'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['ord3'], \
                                     'friends'       : []}})

    transform_dict.update({'sp10' : {'parents'       : ['sp10'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['ord3'], \
                                     'friends'       : []}})
    
    
    transform_dict.update({'sp11' : {'parents'       : ['sp11'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['spl5'], \
                                     'coworkers'     : ['ord3'], \
                                     'friends'       : []}})
    
    transform_dict.update({'sp12' : {'parents'       : ['sp12'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['sp11'], \
                                     'coworkers'     : ['ord3'], \
                                     'friends'       : []}})
    
    transform_dict.update({'sp13' : {'parents'       : ['sp13'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['sp10'], \
                                     'coworkers'     : ['ord3'], \
                                     'friends'       : []}})
    
    transform_dict.update({'sp14' : {'parents'       : ['sp14'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['sp13'], \
                                     'coworkers'     : ['ord3'], \
                                     'friends'       : []}})
    
    transform_dict.update({'sp15' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['sp15'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'sp16' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['sp16'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'sp17' : {'parents'       : ['sp17'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['spl5'], \
                                     'coworkers'     : ['ord3'], \
                                     'friends'       : []}})
    
    transform_dict.update({'sp18' : {'parents'       : ['sp18'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : ['sp17'], \
                                     'coworkers'     : ['ord3'], \
                                     'friends'       : []}})

    transform_dict.update({'sp19' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['sp19'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'sp20' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['sp20'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'sbst' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['sbst'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'sbs2' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['sbs2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'sbs3' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['sbs3'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'sbs4' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['sbs4'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'hash' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['hash'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'hsh2' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['hsh2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'hs10' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['hs10'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'Uhsh' : {'parents'       : ['Uhsh'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['hash'], \
                                     'friends'       : []}})

    transform_dict.update({'Uhs2' : {'parents'       : ['Uhs2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['hsh2'], \
                                     'friends'       : []}})
    
    transform_dict.update({'Uh10' : {'parents'       : ['Uh10'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['hs10'], \
                                     'friends'       : []}})
    
    transform_dict.update({'srch' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['srch'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'src2' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['src2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'src3' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['src3'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'src4' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['src4'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'aggt' : {'parents'       : ['aggt'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['ord3'], \
                                     'friends'       : []}})
    
    transform_dict.update({'strn' : {'parents'       : ['strn'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['ord3'], \
                                     'friends'       : []}})

  
    transform_dict.update({'strg' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['strg'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'nmrc' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['nmrc'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'nmr2' : {'parents'       : ['nmr2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['nmbr'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'nmr3' : {'parents'       : ['nmr3'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['mnmx'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'nmr4' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['nmr4'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'nmr5' : {'parents'       : ['nmr5'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['nmbr'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'nmr6' : {'parents'       : ['nmr6'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['mnmx'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'nmr7' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['nmr7'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'nmr8' : {'parents'       : ['nmr8'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['nmbr'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'nmr9' : {'parents'       : ['nmr9'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['mnmx'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'nmcm' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['nmcm'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'nmc2' : {'parents'       : ['nmc2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['nmbr'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'nmc3' : {'parents'       : ['nmc3'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['mnmx'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'nmc4' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['nmc4'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'nmc5' : {'parents'       : ['nmc5'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['nmbr'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'nmc6' : {'parents'       : ['nmc6'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['mnmx'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'nmc7' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['nmc7'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'nmc8' : {'parents'       : ['nmc8'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['nmbr'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'nmc9' : {'parents'       : ['nmc9'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['mnmx'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'nmEU' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['nmEU'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'nmE2' : {'parents'       : ['nmE2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['nmbr'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'nmE3' : {'parents'       : ['nmE3'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['mnmx'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'nmE4' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['nmE4'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'nmE5' : {'parents'       : ['nmE5'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['nmbr'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'nmE6' : {'parents'       : ['nmE6'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['mnmx'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'nmE7' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['nmE7'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'nmE8' : {'parents'       : ['nmE8'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['nmbr'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'nmE9' : {'parents'       : ['nmE9'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['mnmx'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'ors7' : {'parents'       : ['spl6', 'nmr2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['ord3'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'ors5' : {'parents'       : ['spl5'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['ord3'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'ors6' : {'parents'       : ['spl6'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['ord3'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'ordl' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['ordl'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
        
    transform_dict.update({'ord2' : {'parents'       : ['ord2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['mnmx'], \
                                     'friends'       : []}})
    
    transform_dict.update({'ord3' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['ord3'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'ord5' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['ord5'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'maxb' : {'parents'       : ['or3b'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'or3b' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['or3b'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['maxb'], \
                                     'friends'       : []}})
  
    transform_dict.update({'matx' : {'parents'       : ['or3c'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['onht'], \
                                     'friends'       : []}})
    
    transform_dict.update({'or3c' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['or3c'], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['matx'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'ma10' : {'parents'       : ['or3d'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['1010'], \
                                     'friends'       : []}})
    
    transform_dict.update({'or3d' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['or3d'], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['ma10'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'ucct' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['ucct'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
        
    transform_dict.update({'ord4' : {'parents'       : ['ord4'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['mnmx'], \
                                     'friends'       : []}})
    
    transform_dict.update({'ors2' : {'parents'       : ['spl2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['ord3'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'or10' : {'parents'       : ['ord4'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['1010'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['mnmx'], \
                                     'friends'       : []}})
    
    transform_dict.update({'or11' : {'parents'       : ['sp11'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['1010'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'or12' : {'parents'       : ['nmr2'], \
                                     'siblings'      : ['sp11'], \
                                     'auntsuncles'   : ['1010'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'or13' : {'parents'       : ['sp12'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['1010'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'or14' : {'parents'       : ['nmr2'], \
                                     'siblings'      : ['sp12'], \
                                     'auntsuncles'   : ['1010'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'or15' : {'parents'       : ['or15'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['sp13'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['1010'], \
                                     'friends'       : []}})
  
    transform_dict.update({'or16' : {'parents'       : ['or16'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['nmr2'], \
                                     'niecesnephews' : ['sp13'], \
                                     'coworkers'     : ['1010'], \
                                     'friends'       : []}})
    
    transform_dict.update({'or17' : {'parents'       : ['or17'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['sp14'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['1010'], \
                                     'friends'       : []}})
    
    transform_dict.update({'or18' : {'parents'       : ['or18'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['nmr2'], \
                                     'niecesnephews' : ['sp14'], \
                                     'coworkers'     : ['1010'], \
                                     'friends'       : []}})

    transform_dict.update({'or19' : {'parents'       : ['or19'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['nmc8'], \
                                     'niecesnephews' : ['sp13'], \
                                     'coworkers'     : ['1010'], \
                                     'friends'       : []}})
    
    transform_dict.update({'or20' : {'parents'       : ['or20'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['nmc8'], \
                                     'niecesnephews' : ['sp14'], \
                                     'coworkers'     : ['1010'], \
                                     'friends'       : []}})
    
    transform_dict.update({'or21' : {'parents'       : ['or21'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['nmc8'], \
                                     'niecesnephews' : ['sp17'], \
                                     'coworkers'     : ['1010'], \
                                     'friends'       : []}})
    
    transform_dict.update({'or22' : {'parents'       : ['or22'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['nmc8'], \
                                     'niecesnephews' : ['sp18'], \
                                     'coworkers'     : ['1010'], \
                                     'friends'       : []}})

    transform_dict.update({'or23' : {'parents'       : ['or23'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['nmcm', 'sp19', 'ord3'], \
                                     'friends'       : []}})
    
    transform_dict.update({'om10' : {'parents'       : ['ord4'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['1010', 'mnmx'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['mnmx'], \
                                     'friends'       : []}})

    transform_dict.update({'mmor' : {'parents'       : ['ord4'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mnmx'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'1010' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['1010'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'null' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['null'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'NArw' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['NArw'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'NAr2' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['NAr2'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'NAr3' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['NAr3'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'NAr4' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['NAr4'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'NAr5' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['NAr5'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'nbr2' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['nmbr'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'nbr3' : {'parents'       : ['nbr3'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : ['bint']}})
    
    transform_dict.update({'MADn' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['MADn'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'MAD2' : {'parents'       : ['MAD2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['nmbr'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'MAD3' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['MAD3'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'mnmx' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mnmx'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'mnm2' : {'parents'       : ['nmbr'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mnmx'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'mnm3' : {'parents'       : ['nmbr'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mnm3'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'mnm4' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mnm3'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'mnm5' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mnmx'], \
                                     'cousins'       : ['nmbr', NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'mnm6' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mnm6'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'mnm7' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mnmx', 'bins'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'mxab' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mxab'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'retn' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['retn'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'rtbn' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['retn', 'bsor'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'rtb2' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['retn', 'bins'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'mean' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mean'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'mea2' : {'parents'       : ['nmbr'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mean'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'mea3' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mean', 'bins'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'tmzn' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['tmzn'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'date' : {'parents'       : ['date'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['year', 'mnth', 'days', 'hour', 'mint', 'scnd'], \
                                     'friends'       : []}})
  
    transform_dict.update({'dat2' : {'parents'       : ['dat2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['bshr', 'wkdy', 'hldy'], \
                                     'friends'       : []}})
    
    transform_dict.update({'dat3' : {'parents'       : ['dat3'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['year', 'mnsn', 'mncs', 'dysn', 'dycs', 'hrsn', 'hrcs', 'misn', 'mics', 'scsn', 'sccs'], \
                                     'friends'       : []}})
    
    transform_dict.update({'dat4' : {'parents'       : ['dat4'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['year', 'mdsn', 'mdcs', 'hmss', 'hmsc'], \
                                     'friends'       : []}})
    
    transform_dict.update({'dat5' : {'parents'       : ['dat5'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['year', 'mdsn', 'mdcs', 'dysn', 'dycs', 'hmss', 'hmsc'], \
                                     'friends'       : []}})
    
    transform_dict.update({'dat6' : {'parents'       : ['dat6'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['year', 'mdsn', 'mdcs', 'hmss', 'hmsc', 'bshr', 'wkdy', 'hldy'], \
                                     'friends'       : []}})
    
    transform_dict.update({'year' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['year'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'yea2' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['year', 'yrsn', 'yrcs', 'mdsn', 'mdcs'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'yrcs' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['yrcs'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'yrsn' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['yrsn'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'mnth' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mnth'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'mnt2' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mnsn', 'mncs'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'mnt3' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mnsn', 'mncs', 'dysn', 'dycs', 'hrsn', 'hrcs', 'misn', 'mics', 'scsn', 'sccs'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'mnt4' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mdsn', 'mdcs'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'mnt5' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mdsn', 'mdcs', 'hmss', 'hmsc'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'mnt6' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mdsn', 'mdcs', 'dysn', 'dycs', 'hmss', 'hmsc'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'mnsn' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mnsn'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'mncs' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mncs'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'mdsn' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mdsn'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'mdcs' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mdcs'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'days' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['days'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'day2' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['dysn', 'dycs'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'day3' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['dysn', 'dycs', 'hrsn', 'hrcs', 'misn', 'mics', 'scsn', 'sccs'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'day4' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['dhms', 'dhmc'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'day5' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['dhms', 'dhmc', 'hmss', 'hmsc'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'dysn' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['dysn'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'dycs' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['dycs'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'dhms' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['dhms'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'dhmc' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['dhmc'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'hour' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['hour'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'hrs2' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['hrsn', 'hrcs'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'hrs3' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['hrsn', 'hrcs', 'misn', 'mics', 'scsn', 'sccs'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'hrs4' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['hmss', 'hmsc'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'hrsn' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['hrsn'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'hrcs' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['hrcs'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'hmss' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['hmss'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'hmsc' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['hmsc'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'mint' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mint'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'min2' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['misn', 'mics'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'min3' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['misn', 'mics', 'scsn', 'sccs'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'min4' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mssn', 'mscs'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'misn' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['misn'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'mics' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mics'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'mssn' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mssn'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'mscs' : {'parents'       : [], \
                                     'siblings': [], \
                                     'auntsuncles'   : ['mscs'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'scnd' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['scnd'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'scn2' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['scsn', 'sccs'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'scsn' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['scsn'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'sccs' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['sccs'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'bxcx' : {'parents'       : ['bxcx'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['nmbr'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'bxc2' : {'parents'       : ['bxc2'], \
                                     'siblings'      : ['nmbr'], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['nmbr'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'bxc3' : {'parents'       : ['bxc3'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['nmbr'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'bxc4' : {'parents'       : ['bxc4'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['nbr2'], \
                                     'friends'       : []}})

    transform_dict.update({'bxc5' : {'parents'       : ['bxc5'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mnmx'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['nbr2', 'bins'], \
                                     'friends'       : []}})

    transform_dict.update({'ntgr' : {'parents'       : ['ntgr'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['retn', '1010', 'ordl'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['mnmx'], \
                                     'friends'       : []}})
    
    transform_dict.update({'ntg2' : {'parents'       : ['ntg2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['retn', '1010', 'ordl', 'pwr2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['mnmx'], \
                                     'friends'       : []}})
    
    transform_dict.update({'ntg3' : {'parents'       : ['ntg3'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['retn', 'ordl', 'por2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['mnmx'], \
                                     'friends'       : []}})
    
    transform_dict.update({'pwrs' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['pwrs'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'pwr2' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['pwr2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'log0' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['log0'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'log1' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['log0', 'pwr2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'logn' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['logn'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'lgnm' : {'parents'       : ['lgnm'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['nmbr'], \
                                     'friends'       : []}})
    
    transform_dict.update({'sqrt' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['sqrt'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'addd' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['addd'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'sbtr' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['sbtr'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'mltp' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['mltp'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'divd' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['divd'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'rais' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['rais'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'absl' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['absl'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'bkt1' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['bkt1'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'bkt2' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['bkt2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'bkt3' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['bkt3'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'bkt4' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['bkt4'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'wkdy' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['wkdy'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'bshr' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['bshr'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'hldy' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['hldy'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'wkds' : {'parents'       : ['wkds'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['text'], \
                                     'friends'       : []}})
  
    transform_dict.update({'wkdo' : {'parents'       : ['wkdo'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['ordl'], \
                                     'friends'       : []}})
    
    transform_dict.update({'mnts' : {'parents'       : ['mnts'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['text'], \
                                     'friends'       : []}})
  
    transform_dict.update({'mnto' : {'parents'       : ['mnto'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['ordl'], \
                                     'friends'       : []}})
    
    transform_dict.update({'bins' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['bins'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'bint' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['bint'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'bsor' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['bsor'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'btor' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['btor'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'bnwd' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['bnwd'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'bnwK' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['bnwK'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'bnwM' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['bnwM'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'bnwo' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['bnwo'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'bnKo' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['bnKo'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'bnMo' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['bnMo'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})    
    
    transform_dict.update({'bnep' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['bnep'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'bne7' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['bne7'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'bne9' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['bne9'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'bneo' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['bneo'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'bn7o' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['bn7o'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'bn9o' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['bn9o'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'tlbn' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['tlbn'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'pwor' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['pwor'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'por2' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['por2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'por3' : {'parents'       : ['por3'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['1010'], \
                                     'friends'       : []}})

    transform_dict.update({'bkb3' : {'parents'       : ['bkb3'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['1010'], \
                                     'friends'       : []}})
  
    transform_dict.update({'bkb4' : {'parents'       : ['bkb4'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['1010'], \
                                     'friends'       : []}})
    
    transform_dict.update({'bsbn' : {'parents'       : ['bsbn'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['1010'], \
                                     'friends'       : []}})
    
    transform_dict.update({'bnwb' : {'parents'       : ['bnwb'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['1010'], \
                                     'friends'       : []}})
    
    transform_dict.update({'bnKb' : {'parents'       : ['bnKb'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['1010'], \
                                     'friends'       : []}})

    transform_dict.update({'bnMb' : {'parents'       : ['bnMb'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['1010'], \
                                     'friends'       : []}})
    
    transform_dict.update({'bneb' : {'parents'       : ['bneb'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['1010'], \
                                     'friends'       : []}})

    transform_dict.update({'bn7b' : {'parents'       : ['bn7b'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['1010'], \
                                     'friends'       : []}})
    
    transform_dict.update({'bn9b' : {'parents'       : ['bn9b'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['1010'], \
                                     'friends'       : []}})
    
    transform_dict.update({'pwbn' : {'parents'       : ['pwbn'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['1010'], \
                                     'friends'       : []}})

    transform_dict.update({'DPnb' : {'parents'       : ['DPn3'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'DPn3' : {'parents'       : ['DPn3'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['DPnb'], \
                                     'friends'       : []}})

    transform_dict.update({'DPmm' : {'parents'       : ['DPm2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'DPm2' : {'parents'       : ['DPm2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['DPmm'], \
                                     'friends'       : []}})

    transform_dict.update({'DPrt' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['DPrt'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'DLnb' : {'parents'       : ['DLn3'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'DLn3' : {'parents'       : ['DLn3'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['DLnb'], \
                                     'friends'       : []}})

    transform_dict.update({'DLmm' : {'parents'       : ['DLm2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'DLm2' : {'parents'       : ['DLm2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['DLmm'], \
                                     'friends'       : []}})

    transform_dict.update({'DLrt' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['DLrt'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'DPbn' : {'parents'       : ['DPb2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'DPb2' : {'parents'       : ['DPb2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['DPbn'], \
                                     'friends'       : []}})
    
    transform_dict.update({'DPod' : {'parents'       : ['DPo4'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'DPo4' : {'parents'       : ['DPo4'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['DPod'], \
                                     'friends'       : []}})
    
    transform_dict.update({'DPoh' : {'parents'       : ['DPo5'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['onht'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'DPo5' : {'parents'       : ['DPo5'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['DPo2'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'DPo2' : {'parents'       : ['DPo2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['onht'], \
                                     'friends'       : []}})
    
    transform_dict.update({'DP10' : {'parents'       : ['DPo6'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['1010'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'DPo6' : {'parents'       : ['DPo6'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['DPo3'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'DPo3' : {'parents'       : ['DPo3'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['1010'], \
                                     'friends'       : []}})

    transform_dict.update({'qbt1' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['qbt1'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'qbt2' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['qbt2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'qbt3' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['qbt3'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'qbt4' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['qbt4'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'nmqb' : {'parents'       : ['nmqb'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['qbt1'], \
                                     'friends'       : []}})
  
    transform_dict.update({'nmq2' : {'parents'       : ['nmq2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : ['qbt1']}})
  
    transform_dict.update({'mmqb' : {'parents'       : ['mmqb'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['qbt3'], \
                                     'friends'       : []}})
    
    transform_dict.update({'mmq2' : {'parents'       : ['mmq2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : ['qbt3']}})
    
    transform_dict.update({'copy' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['copy'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'excl' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['excl'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'exc2' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['exc2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'exc3' : {'parents'       : ['exc3'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : ['bins']}})
    
    transform_dict.update({'exc4' : {'parents'       : ['exc4'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : ['pwr2']}})
    
    transform_dict.update({'exc5' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['exc5'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'exc6' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['exc2'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'exc7' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['exc5'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'exc8' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['exc5'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'exc9' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['exc5'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'shfl' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['shfl'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'nmbd' : {'parents'       : ['nmbr'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : [bint]}})

    transform_dict.update({'101d' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['1010'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'ordd' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['ord3'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'texd' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['text'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'bnrd' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['bnry'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'datd' : {'parents'       : ['datd'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['year', 'mdsn', 'mdcs', 'hmss', 'hmsc', 'bshr', 'wkdy', 'hldy'], \
                                     'friends'       : []}})
    
    transform_dict.update({'nuld' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['null'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'lbnm' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['exc2'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'lbnb' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['nmbr'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'lb10' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['1010'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'lbor' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['ordl'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'lbos' : {'parents'       : ['lbos'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['strg'], \
                                     'friends'       : []}})
    
    transform_dict.update({'lbte' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['text'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'lbbn' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['bnry'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'lbsm' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['lbsm'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'lbfs' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['lbfs'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'lbda' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['year', 'mdsn', 'mdcs', 'hmss', 'hmsc', 'bshr', 'wkdy', 'hldy'], \
                                     'cousins'       : [], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'lgnr' : {'parents'       : ['lgnr', 'sgn3'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['lgn2'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
  
    transform_dict.update({'lgn2' : {'parents'       : ['lgn2'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['qbt5'], \
                                     'friends'       : []}})
    
    transform_dict.update({'sgn1' : {'parents'       : ['sgn1'], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : [], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : ['sgn2'], \
                                     'friends'       : []}})
    
    transform_dict.update({'qbt5' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['qbt5'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})
    
    transform_dict.update({'sgn2' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['sgn2'], \
                                     'cousins'       : [NArw], \
                                     'children'      : [], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'sgn3' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['sgn3'], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['sgn4'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    transform_dict.update({'sgn4' : {'parents'       : [], \
                                     'siblings'      : [], \
                                     'auntsuncles'   : ['sgn4'], \
                                     'cousins'       : [NArw], \
                                     'children'      : ['sgn1'], \
                                     'niecesnephews' : [], \
                                     'coworkers'     : [], \
                                     'friends'       : []}})

    return transform_dict
  
  def _assembleprocessdict(self):
    '''
    #creates a dictionary storing all of the processing functions for each
    #category. Note that the convention is that every dualprocess entry 
    #(to process both train and text set in automunge) is meant
    #to have a coresponding postprocess entry (to process the test set in 
    #postmunge). If the dualprocess/postprocess pair aren't included a 
    #singleprocess funciton will be instead which processes a single column
    #at a time and is neutral to whether that set is from train or test data.
    
    #note that the functionpointer entry is currenlty only available for user passed processdict
    #this internal library process_dict does not accept functionpointer entries
    
    #NArowtype entries are:
    # - 'numeric' for source columns with expected numeric entries
    # - 'integer' for source column with expected integer entries
    # - 'justNaN' for source columns that may have expected entries other than numeric
    # - 'exclude' for source columns that aren't needing NArow columns derived
    # - 'positivenumeric' for source columns with expected positive numeric entries
    # - 'nonnegativenumeric' for source columns with expected non-nbegative numeric (zero allowed)
    # - 'nonzeronumeric' for source columns with allowed postiive and negative but no zero
    # - 'parsenumeric' marks for infill strings that don't contain any numeric characters
    # - 'datetime' marks for infill cells that arent' recognized as datetime objects
    
    #MLinfilltype entries are:
    # - 'numeric' for single columns with numeric entries (such as could be signed floats)
    # - 'singlct' for single column sets with ordinal entries (nonnegative integer classification)
    # - 'integer' for single column sets with integer entries (signed integer regression)
    # - 'binary' for single column sets with boolean entries (0/1)
    # - 'multirt' for categorical multicolumn sets with boolean entries (0/1), up to one activation per row
    # - 'concurrent_act' for multicolumn sets with boolean entries as may have 
    #multiple entries in the same row
    # - 'concurrent_nmbr' for multicolumn sets with numerical entries
    # - 'exclude' for columns which will be excluded from ML infill
    # - '1010' for binary encoded columns, will be converted to onehot for ML
    # - 'boolexclude' boolean set suitable for Binary transform but exluded from infill
    # - 'ordlexclude' ordinal set exluded from infill
    # - 'totalexclude' sets excluded from all methods that inspect MLinfilltype, such as for excl category

    #at least one of sets of ('dualprocess' and 'postprocess') or ('singleprocess') needs to be specified
    #'inverseprocess' is optional and supports postmunge inversion
    #'info_retention' is optional boolean required with inversion to prioritize transforms with more information retention
    #'inplace_option' is optional boolean to signal when a transfomration function accepts inplace operations
    #'labelctgy' is associated with feature importance and signals which transform is target for predictive model
    #for cases when a family tree returns multiple configurations and category isapplied to a label set

    #note to self that any future updates such as additional supported entries should be carried through to functionpointer functions
    '''
    
    process_dict = {}
    
    #categories are nmbr, bnry, text, date, bxcx, bins, bint, NArw, null
    #note a future extension will allow the definition of new categories 
    #to automunge

    #dual column functions
    process_dict.update({'nmbr' : {'dualprocess' : self._process_numerical, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_numerical, \
                                  'inverseprocess' : self._inverseprocess_nmbr, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmbr'}})
    process_dict.update({'dxdt' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_dxdt, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'retn'}})
    process_dict.update({'d2dt' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_dxdt, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'retn'}})
    process_dict.update({'d3dt' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_dxdt, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'retn'}})
    process_dict.update({'d4dt' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_dxdt, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'retn'}})
    process_dict.update({'d5dt' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_dxdt, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'retn'}})
    process_dict.update({'d6dt' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_dxdt, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'retn'}})
    process_dict.update({'dxd2' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_dxd2, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'retn'}})
    process_dict.update({'d2d2' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_dxd2, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'retn'}})
    process_dict.update({'d3d2' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_dxd2, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'retn'}})
    process_dict.update({'d4d2' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_dxd2, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'retn'}})
    process_dict.update({'d5d2' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_dxd2, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'retn'}})
    process_dict.update({'d6d2' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_dxd2, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'retn'}})
    process_dict.update({'nmdx' : {'dualprocess' : self._process_numerical, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_numerical, \
                                  'inverseprocess' : self._inverseprocess_nmbr, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'retn'}})
    process_dict.update({'nmd2' : {'dualprocess' : self._process_numerical, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_numerical, \
                                  'inverseprocess' : self._inverseprocess_nmbr, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'retn'}})
    process_dict.update({'nmd3' : {'dualprocess' : self._process_numerical, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_numerical, \
                                  'inverseprocess' : self._inverseprocess_nmbr, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'retn'}})
    process_dict.update({'nmd4' : {'dualprocess' : self._process_numerical, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_numerical, \
                                  'inverseprocess' : self._inverseprocess_nmbr, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'retn'}})
    process_dict.update({'nmd5' : {'dualprocess' : self._process_numerical, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_numerical, \
                                  'inverseprocess' : self._inverseprocess_nmbr, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'retn'}})
    process_dict.update({'nmd6' : {'dualprocess' : self._process_numerical, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_numerical, \
                                  'inverseprocess' : self._inverseprocess_nmbr, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'retn'}})
    process_dict.update({'mmdx' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_dxdt, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmbr'}})
    process_dict.update({'mmd2' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_dxdt, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmbr'}})
    process_dict.update({'mmd3' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_dxdt, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmbr'}})
    process_dict.update({'mmd4' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_dxdt, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmbr'}})
    process_dict.update({'mmd5' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_dxdt, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmbr'}})
    process_dict.update({'mmd6' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_dxdt, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmbr'}})
    process_dict.update({'dddt' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_dxdt, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'dxdt'}})
    process_dict.update({'ddd2' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_dxdt, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'dxdt'}})
    process_dict.update({'ddd3' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_dxdt, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'dxdt'}})
    process_dict.update({'ddd4' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_dxdt, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'dxdt'}})
    process_dict.update({'ddd5' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_dxdt, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'dxdt'}})
    process_dict.update({'ddd6' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_dxdt, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'dxdt'}})
    process_dict.update({'dedt' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_dxd2, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'dxd2'}})
    process_dict.update({'ded2' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_dxd2, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'dxd2'}})
    process_dict.update({'ded3' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_dxd2, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'dxd2'}})
    process_dict.update({'ded4' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_dxd2, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'dxd2'}})
    process_dict.update({'ded5' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_dxd2, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'dxd2'}})
    process_dict.update({'ded6' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_dxd2, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'dxd2'}})
    process_dict.update({'shft' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_shft, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self._inverseprocess_shft, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'exclude', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'shft'}})
    process_dict.update({'shf2' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_shf2, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self._inverseprocess_shft, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'exclude', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'shf2'}})
    process_dict.update({'shf3' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_shf3, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self._inverseprocess_shft, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'exclude', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'shf3'}})
    process_dict.update({'shf4' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_shft, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self._inverseprocess_shft, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'exclude', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'retn'}})
    process_dict.update({'shf5' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_shf2, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self._inverseprocess_shft, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'exclude', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'retn'}})
    process_dict.update({'shf6' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_shf3, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self._inverseprocess_shft, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'exclude', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'retn'}})
    process_dict.update({'shf7' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_shft, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self._inverseprocess_shft, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'exclude', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'retn'}})
    process_dict.update({'shf8' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_shft, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self._inverseprocess_shft, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'exclude', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'retn'}})
    process_dict.update({'nbr2' : {'dualprocess' : self._process_numerical, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_numerical, \
                                  'inverseprocess' : self._inverseprocess_nmbr, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmbr'}})
    process_dict.update({'nbr3' : {'dualprocess' : self._process_numerical, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_numerical, \
                                  'inverseprocess' : self._inverseprocess_nmbr, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmbr'}})
    process_dict.update({'MADn' : {'dualprocess' : self._process_MADn, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_MADn, \
                                  'inverseprocess' : self._inverseprocess_MADn, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'MADn'}})
    process_dict.update({'MAD2' : {'dualprocess' : self._process_MADn, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_MADn, \
                                  'inverseprocess' : self._inverseprocess_MADn, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'MADn'}})
    process_dict.update({'MAD3' : {'dualprocess' : self._process_MAD3, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_MAD3, \
                                  'inverseprocess' : self._inverseprocess_MAD3, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'MAD3'}})
    process_dict.update({'mnmx' : {'dualprocess' : self._process_mnmx, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_mnmx, \
                                  'inverseprocess' : self._inverseprocess_mnmx, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'mnmx'}})
    process_dict.update({'mnm2' : {'dualprocess' : self._process_mnmx, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_mnmx, \
                                  'inverseprocess' : self._inverseprocess_mnmx, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'mnmx'}})
    process_dict.update({'mnm3' : {'dualprocess' : self._process_mnm3, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_mnm3, \
                                  'inverseprocess' : self._inverseprocess_mnm3, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'mnm3'}})
    process_dict.update({'mnm4' : {'dualprocess' : self._process_mnm3, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_mnm3, \
                                  'inverseprocess' : self._inverseprocess_mnm3, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'mnm3'}})
    process_dict.update({'mnm5' : {'dualprocess' : self._process_mnmx, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_mnmx, \
                                  'inverseprocess' : self._inverseprocess_mnmx, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'mnmx'}})
    process_dict.update({'mnm6' : {'dualprocess' : self._process_mnmx, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_mnmx, \
                                  'inverseprocess' : self._inverseprocess_mnmx, \
                                  'defaultparams' : {'floor' : True}, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'mnmx'}})
    process_dict.update({'mnm7' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'mnm7'}})
    process_dict.update({'mxab' : {'dualprocess' : self._process_mxab, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_mxab, \
                                  'inverseprocess' : self._inverseprocess_mxab, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'mxab'}})
    process_dict.update({'retn' : {'dualprocess' : self._process_retn, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_retn, \
                                  'inverseprocess' : self._inverseprocess_retn, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'retn'}})
    process_dict.update({'rtbn' : {'dualprocess' : self._process_retn, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_retn, \
                                  'inverseprocess' : self._inverseprocess_retn, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'retn'}})
    process_dict.update({'rtb2' : {'dualprocess' : self._process_retn, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_retn, \
                                  'inverseprocess' : self._inverseprocess_retn, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'retn'}})
    process_dict.update({'mean' : {'dualprocess' : self._process_mean, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_mean, \
                                  'inverseprocess' : self._inverseprocess_mean, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'mean'}})
    process_dict.update({'mea2' : {'dualprocess' : self._process_mean, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_mean, \
                                  'inverseprocess' : self._inverseprocess_mean, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'mean'}})
    process_dict.update({'mea3' : {'dualprocess' : self._process_mean, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_mean, \
                                  'inverseprocess' : self._inverseprocess_mean, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'mean'}})
    process_dict.update({'bnry' : {'dualprocess' : self._process_binary, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_binary, \
                                  'inverseprocess' : self._inverseprocess_bnry, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'binary', \
                                  'labelctgy' : 'bnry'}})
    process_dict.update({'bnr2' : {'dualprocess' : self._process_binary2, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_binary2, \
                                  'inverseprocess' : self._inverseprocess_bnry, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'binary', \
                                  'labelctgy' : 'bnr2'}})
    process_dict.update({'onht' : {'dualprocess' : self._process_onht, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_onht, \
                                  'inverseprocess' : self._inverseprocess_onht, \
                                  'info_retention' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'multirt', \
                                  'labelctgy' : 'onht'}})
    process_dict.update({'text' : {'dualprocess' : self._process_text, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_text, \
                                  'inverseprocess' : self._inverseprocess_text, \
                                  'info_retention' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'multirt', \
                                  'labelctgy' : 'text'}})
    process_dict.update({'txt2' : {'dualprocess' : self._process_text, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_text, \
                                  'inverseprocess' : self._inverseprocess_text, \
                                  'info_retention' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'multirt', \
                                  'labelctgy' : 'text'}})
    process_dict.update({'txt3' : {'dualprocess' : self._process_spl2, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_spl2, \
                                  'inverseprocess' : self._inverseprocess_spl2, \
                                  'info_retention' : False, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'text'}})
    process_dict.update({'smth' : {'dualprocess' : self._process_smth, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_smth, \
                                  'inverseprocess' : self._inverseprocess_smth, \
                                  'info_retention' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'smth'}})
    process_dict.update({'fsmh' : {'dualprocess' : self._process_smth, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_smth, \
                                  'inverseprocess' : self._inverseprocess_smth, \
                                  'info_retention' : True, \
                                  'defaultparams' : {'LSfit' : True}, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'smth'}})
    process_dict.update({'lngt' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_lngt, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'integer', \
                                  'labelctgy' : 'mnmx'}})
    process_dict.update({'lnlg' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_lngt, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'integer', \
                                  'labelctgy' : 'log0'}})
    process_dict.update({'UPCS' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_UPCS, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self._inverseprocess_UPCS, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'exclude'}})
    process_dict.update({'Unht' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_UPCS, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self._inverseprocess_UPCS, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'onht'}})
    process_dict.update({'Utxt' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_UPCS, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self._inverseprocess_UPCS, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'text'}})
    process_dict.update({'Utx2' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_UPCS, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self._inverseprocess_UPCS, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'text'}})
    process_dict.update({'Utx3' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_UPCS, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self._inverseprocess_UPCS, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'text'}})
    process_dict.update({'Ucct' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_UPCS, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self._inverseprocess_UPCS, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'ucct'}})
    process_dict.update({'Uord' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_UPCS, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self._inverseprocess_UPCS, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'ordl'}})
    process_dict.update({'Uor2' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_UPCS, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self._inverseprocess_UPCS, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'mnmx'}})
    process_dict.update({'Uor3' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_UPCS, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self._inverseprocess_UPCS, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'ord3'}})
    process_dict.update({'Uor6' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_UPCS, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self._inverseprocess_UPCS, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'ord3'}})
    process_dict.update({'U101' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_UPCS, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self._inverseprocess_UPCS, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : '1010'}})
    process_dict.update({'splt' : {'dualprocess' : self._process_splt, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_splt, \
                                  'inverseprocess' : self._inverseprocess_splt, \
                                  'info_retention' : False, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'multirt', \
                                  'labelctgy' : 'splt'}})

    process_dict.update({'spl2' : {'dualprocess' : self._process_spl2, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_spl2, \
                                  'inverseprocess' : self._inverseprocess_spl2, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix' : '_spl2', \
                                                     'test_same_as_train' : False, \
                                                     'consolidate_nonoverlaps' : False}, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'ord3'}})
    process_dict.update({'spl5' : {'dualprocess' : self._process_spl2, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_spl2, \
                                  'inverseprocess' : self._inverseprocess_spl2, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix' : '_spl5', \
                                                     'test_same_as_train' : False, \
                                                     'consolidate_nonoverlaps' : True}, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'ord3'}})
    process_dict.update({'spl6' : {'dualprocess' : self._process_spl2, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_spl2, \
                                  'inverseprocess' : self._inverseprocess_spl2, \
                                  'info_retention' : False, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'ord3'}})
    process_dict.update({'spl7' : {'dualprocess' : self._process_spl2, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_spl2, \
                                  'inverseprocess' : self._inverseprocess_spl2, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix' : '_spl7', \
                                                     'test_same_as_train' : False, \
                                                     'consolidate_nonoverlaps' : True, \
                                                     'minsplit' : 1}, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'ord3'}})
    process_dict.update({'spl8' : {'dualprocess' : self._process_splt, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_splt, \
                                  'inverseprocess' : self._inverseprocess_splt, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix' : '_spl8', 'test_same_as_train' : True}, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'multirt', \
                                  'labelctgy' : 'splt'}})
    process_dict.update({'spl9' : {'dualprocess' : self._process_spl2, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_spl2, \
                                  'inverseprocess' : self._inverseprocess_spl2, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix' : '_spl9', \
                                                     'test_same_as_train' : True, \
                                                     'consolidate_nonoverlaps' : False}, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'ord3'}})
    process_dict.update({'sp10' : {'dualprocess' : self._process_spl2, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_spl2, \
                                  'inverseprocess' : self._inverseprocess_spl2, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix' : '_sp10', \
                                                     'test_same_as_train' : True, \
                                                     'consolidate_nonoverlaps' : True}, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'ord3'}})
    process_dict.update({'sp11' : {'dualprocess' : self._process_spl2, \
                                   'singleprocess' : None, \
                                   'postprocess' : self._postprocess_spl2, \
                                   'inverseprocess' : self._inverseprocess_spl2, \
                                   'info_retention' : False, \
                                  'defaultparams' : {'suffix' : '_spl2', \
                                                     'test_same_as_train' : False, \
                                                     'consolidate_nonoverlaps' : False}, \
                                   'NArowtype' : 'justNaN', \
                                   'MLinfilltype' : 'exclude', \
                                   'labelctgy' : 'ord3'}})
    process_dict.update({'sp12' : {'dualprocess' : self._process_spl2, \
                                   'singleprocess' : None, \
                                   'postprocess' : self._postprocess_spl2, \
                                   'inverseprocess' : self._inverseprocess_spl2, \
                                   'info_retention' : False, \
                                  'defaultparams' : {'suffix' : '_spl2', \
                                                     'test_same_as_train' : False, \
                                                     'consolidate_nonoverlaps' : False}, \
                                   'NArowtype' : 'justNaN', \
                                   'MLinfilltype' : 'exclude', \
                                   'labelctgy' : 'ord3'}})
    process_dict.update({'sp13' : {'dualprocess' : self._process_spl2, \
                                   'singleprocess' : None, \
                                   'postprocess' : self._postprocess_spl2, \
                                   'inverseprocess' : self._inverseprocess_spl2, \
                                   'info_retention' : False, \
                                  'defaultparams' : {'suffix' : '_spl9', \
                                                     'test_same_as_train' : True, \
                                                     'consolidate_nonoverlaps' : False}, \
                                   'NArowtype' : 'justNaN', \
                                   'MLinfilltype' : 'exclude', \
                                   'labelctgy' : 'ord3'}})
    process_dict.update({'sp14' : {'dualprocess' : self._process_spl2, \
                                   'singleprocess' : None, \
                                   'postprocess' : self._postprocess_spl2, \
                                   'inverseprocess' : self._inverseprocess_spl2, \
                                   'info_retention' : False, \
                                  'defaultparams' : {'suffix' : '_spl9', \
                                                     'test_same_as_train' : True, \
                                                     'consolidate_nonoverlaps' : False}, \
                                   'NArowtype' : 'justNaN', \
                                   'MLinfilltype' : 'exclude', \
                                   'labelctgy' : 'ord3'}})
    process_dict.update({'sp15' : {'dualprocess' : self._process_splt, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_splt, \
                                  'inverseprocess' : self._inverseprocess_splt, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix' : '_sp15', \
                                                     'concurrent_activations': True, \
                                                     'test_same_as_train' : False}, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'concurrent_act', \
                                  'labelctgy' : 'splt'}})
    process_dict.update({'sp16' : {'dualprocess' : self._process_splt, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_splt, \
                                  'inverseprocess' : self._inverseprocess_splt, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix' : '_sp16', \
                                                     'concurrent_activations': True, \
                                                     'test_same_as_train' : True}, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'concurrent_act', \
                                  'labelctgy' : 'splt'}})
    process_dict.update({'sp17' : {'dualprocess' : self._process_spl2, \
                                   'singleprocess' : None, \
                                   'postprocess' : self._postprocess_spl2, \
                                   'inverseprocess' : self._inverseprocess_spl2, \
                                   'info_retention' : False, \
                                  'defaultparams' : {'suffix' : '_spl2', \
                                                     'test_same_as_train' : False, \
                                                     'consolidate_nonoverlaps' : False}, \
                                   'NArowtype' : 'justNaN', \
                                   'MLinfilltype' : 'exclude', \
                                   'labelctgy' : 'ord3'}})
    process_dict.update({'sp18' : {'dualprocess' : self._process_spl2, \
                                   'singleprocess' : None, \
                                   'postprocess' : self._postprocess_spl2, \
                                   'inverseprocess' : self._inverseprocess_spl2, \
                                   'info_retention' : False, \
                                  'defaultparams' : {'suffix' : '_spl2', \
                                                     'test_same_as_train' : False, \
                                                     'consolidate_nonoverlaps' : False}, \
                                   'NArowtype' : 'justNaN', \
                                   'MLinfilltype' : 'exclude', \
                                   'labelctgy' : 'ord3'}})
    process_dict.update({'sp19' : {'dualprocess' : self._process_sp19, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_sp19, \
                                  'inverseprocess' : self._inverseprocess_sp19, \
                                  'info_retention' : False, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : '1010', \
                                  'labelctgy' : 'sp19'}})
    process_dict.update({'sp20' : {'dualprocess' : self._process_sp19, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_sp19, \
                                  'inverseprocess' : self._inverseprocess_sp19, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix' : '_sp20', \
                                                     'test_same_as_train' : True}, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : '1010', \
                                  'labelctgy' : 'sp19'}})
    process_dict.update({'sbst' : {'dualprocess' : self._process_sbst, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_sbst, \
                                  'inverseprocess' : self._inverseprocess_sbst, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix' : '_sbst', \
                                                     'test_same_as_train' : False}, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'concurrent_act', \
                                  'labelctgy' : 'sbst'}})
    process_dict.update({'sbs2' : {'dualprocess' : self._process_sbst, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_sbst, \
                                  'inverseprocess' : self._inverseprocess_sbst, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix' : '_sbs2', \
                                                     'test_same_as_train' : True}, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'concurrent_act', \
                                  'labelctgy' : 'sbst'}})
    process_dict.update({'sbs3' : {'dualprocess' : self._process_sbs3, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_sbs3, \
                                  'inverseprocess' : self._inverseprocess_sbs3, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix' : '_sbs3', \
                                                     'test_same_as_train' : False}, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : '1010', \
                                  'labelctgy' : 'sbs3'}})
    process_dict.update({'sbs4' : {'dualprocess' : self._process_sbs3, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_sbs3, \
                                  'inverseprocess' : self._inverseprocess_sbs3, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix' : '_sbs4', \
                                                     'test_same_as_train' : True}, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : '1010', \
                                  'labelctgy' : 'sbs3'}})
    process_dict.update({'hash' : {'dualprocess' : self._process_hash, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_hash, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'ordlexclude', \
                                  'labelctgy' : 'hash'}})
    process_dict.update({'hsh2' : {'dualprocess' : self._process_hash, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_hash, \
                                  'inplace_option' : True, \
                                  'defaultparams' : {'space' : '', \
                                                     'excluded_characters' : []}, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'ordlexclude', \
                                  'labelctgy' : 'hash'}})
    process_dict.update({'hs10' : {'dualprocess' : self._process_hs10, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_hs10, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'boolexclude', \
                                  'labelctgy' : 'hs10'}})
    process_dict.update({'Uhsh' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_UPCS, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self._inverseprocess_UPCS, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'hash'}})
    process_dict.update({'Uhs2' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_UPCS, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self._inverseprocess_UPCS, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'hash'}})
    process_dict.update({'Uh10' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_UPCS, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self._inverseprocess_UPCS, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'hs10'}})
    process_dict.update({'srch' : {'dualprocess' : self._process_srch, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_srch, \
                                   'inverseprocess' : self._inverseprocess_srch, \
                                   'info_retention' : False, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'concurrent_act', \
                                  'labelctgy' : 'srch'}})
    process_dict.update({'src2' : {'dualprocess' : self._process_src2, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_src2, \
                                   'inverseprocess' : self._inverseprocess_src2, \
                                   'info_retention' : False, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'concurrent_act', \
                                  'labelctgy' : 'src2'}})
    process_dict.update({'src3' : {'dualprocess' : self._process_src3, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_src3, \
                                   'inverseprocess' : self._inverseprocess_src3, \
                                   'info_retention' : False, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'concurrent_act', \
                                  'labelctgy' : 'src3'}})
    process_dict.update({'src4' : {'dualprocess' : self._process_src4, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_src4, \
                                   'inverseprocess' : self._inverseprocess_src4, \
                                   'info_retention' : False, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'src4'}})
    process_dict.update({'aggt' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_aggt, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self._inverseprocess_UPCS, \
                                  'info_retention' : False, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'ord3'}})
    process_dict.update({'strn' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_strn, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'ord3'}})
    process_dict.update({'strg' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_strg, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self._inverseprocess_strg, \
                                  'info_retention' : True, \
                                  'NArowtype' : 'integer', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'ord3'}})
    process_dict.update({'nmrc' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_nmrc, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self._inverseprocess_nmrc, \
                                  'info_retention' : True, \
                                  'defaultparams' : {'convention' : 'numbers', \
                                                     'suffix' : '_nmrc'}, \
                                  'NArowtype' : 'parsenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmrc'}})
    process_dict.update({'nmr2' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_nmrc, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self._inverseprocess_nmrc, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'convention' : 'numbers', \
                                                     'suffix' : '_nmrc'}, \
                                  'NArowtype' : 'parsenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmbr'}})
    process_dict.update({'nmr3' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_nmrc, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self._inverseprocess_nmrc, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'convention' : 'numbers', \
                                                     'suffix' : '_nmrc'}, \
                                  'NArowtype' : 'parsenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'mnmx'}})
    process_dict.update({'nmr4' : {'dualprocess' : self._process_nmr4, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_nmr4, \
                                  'inverseprocess' : self._inverseprocess_nmrc, \
                                  'info_retention' : True, \
                                  'defaultparams' : {'convention' : 'numbers', \
                                                     'suffix' : '_nmr4', \
                                                     'test_same_as_train' : True}, \
                                  'NArowtype' : 'parsenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmrc'}})
    process_dict.update({'nmr5' : {'dualprocess' : self._process_nmr4, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_nmr4, \
                                  'inverseprocess' : self._inverseprocess_nmrc, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'convention' : 'numbers', \
                                                     'suffix' : '_nmr4', \
                                                     'test_same_as_train' : True}, \
                                  'NArowtype' : 'parsenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmbr'}})
    process_dict.update({'nmr6' : {'dualprocess' : self._process_nmr4, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_nmr4, \
                                  'inverseprocess' : self._inverseprocess_nmrc, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'convention' : 'numbers', \
                                                     'suffix' : '_nmr4', \
                                                     'test_same_as_train' : True}, \
                                  'NArowtype' : 'parsenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'mnmx'}})
    process_dict.update({'nmr7' : {'dualprocess' : self._process_nmr4, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_nmr4, \
                                  'inverseprocess' : self._inverseprocess_nmrc, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'convention' : 'numbers', \
                                                     'suffix' : '_nmr7', \
                                                     'test_same_as_train' : False}, \
                                  'NArowtype' : 'parsenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmrc'}})
    process_dict.update({'nmr8' : {'dualprocess' : self._process_nmr4, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_nmr4, \
                                  'inverseprocess' : self._inverseprocess_nmrc, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'convention' : 'numbers', \
                                                     'suffix' : '_nmr7', \
                                                     'test_same_as_train' : False}, \
                                  'NArowtype' : 'parsenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmbr'}})
    process_dict.update({'nmr9' : {'dualprocess' : self._process_nmr4, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_nmr4, \
                                  'inverseprocess' : self._inverseprocess_nmrc, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'convention' : 'numbers', \
                                                     'suffix' : '_nmr7', \
                                                     'test_same_as_train' : False}, \
                                  'NArowtype' : 'parsenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'mnmx'}})
    process_dict.update({'nmcm' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_nmrc, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self._inverseprocess_nmrc, \
                                  'info_retention' : True, \
                                  'defaultparams' : {'convention' : 'commas', \
                                                     'suffix' : '_nmcm'}, \
                                  'NArowtype' : 'parsenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmcm'}})
    process_dict.update({'nmc2' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_nmrc, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self._inverseprocess_nmrc, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'convention' : 'commas', \
                                                     'suffix' : '_nmcm'}, \
                                  'NArowtype' : 'parsenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmbr'}})
    process_dict.update({'nmc3' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_nmrc, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self._inverseprocess_nmrc, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'convention' : 'commas', \
                                                     'suffix' : '_nmcm'}, \
                                  'NArowtype' : 'parsenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'mnmx'}})
    process_dict.update({'nmc4' : {'dualprocess' : self._process_nmr4, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_nmr4, \
                                  'inverseprocess' : self._inverseprocess_nmrc, \
                                  'info_retention' : True, \
                                  'defaultparams' : {'convention' : 'commas', \
                                                     'suffix' : '_nmc4', \
                                                     'test_same_as_train' : True}, \
                                  'NArowtype' : 'parsenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmrc'}})
    process_dict.update({'nmc5' : {'dualprocess' : self._process_nmr4, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_nmr4, \
                                  'inverseprocess' : self._inverseprocess_nmrc, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'convention' : 'commas', \
                                                     'suffix' : '_nmc4', \
                                                     'test_same_as_train' : True}, \
                                  'NArowtype' : 'parsenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmbr'}})
    process_dict.update({'nmc6' : {'dualprocess' : self._process_nmr4, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_nmr4, \
                                  'inverseprocess' : self._inverseprocess_nmrc, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'convention' : 'commas', \
                                                     'suffix' : '_nmc4', \
                                                     'test_same_as_train' : True}, \
                                  'NArowtype' : 'parsenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'mnmx'}})
    process_dict.update({'nmc7' : {'dualprocess' : self._process_nmr4, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_nmr4, \
                                  'inverseprocess' : self._inverseprocess_nmrc, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'convention' : 'commas', \
                                                     'suffix' : '_nmc7', \
                                                     'test_same_as_train' : False}, \
                                  'NArowtype' : 'parsenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmrc'}})
    process_dict.update({'nmc8' : {'dualprocess' : self._process_nmr4, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_nmr4, \
                                  'inverseprocess' : self._inverseprocess_nmrc, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'convention' : 'commas', \
                                                     'suffix' : '_nmc7', \
                                                     'test_same_as_train' : False}, \
                                  'NArowtype' : 'parsenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmbr'}})
    process_dict.update({'nmc9' : {'dualprocess' : self._process_nmr4, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_nmr4, \
                                  'inverseprocess' : self._inverseprocess_nmrc, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'convention' : 'commas', \
                                                     'suffix' : '_nmc7', \
                                                     'test_same_as_train' : False}, \
                                  'NArowtype' : 'parsenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'mnmx'}})
    process_dict.update({'nmEU' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_nmrc, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self._inverseprocess_nmrc, \
                                  'info_retention' : True, \
                                  'defaultparams' : {'convention' : 'spaces', \
                                                     'suffix' : '_nmEU'}, \
                                  'NArowtype' : 'parsenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmrc'}})
    process_dict.update({'nmE2' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_nmrc, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self._inverseprocess_nmrc, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'convention' : 'spaces', \
                                                     'suffix' : '_nmEU'}, \
                                  'NArowtype' : 'parsenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmbr'}})
    process_dict.update({'nmE3' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_nmrc, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self._inverseprocess_nmrc, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'convention' : 'spaces', \
                                                     'suffix' : '_nmEU'}, \
                                  'NArowtype' : 'parsenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'mnmx'}})
    process_dict.update({'nmE4' : {'dualprocess' : self._process_nmr4, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_nmr4, \
                                  'inverseprocess' : self._inverseprocess_nmrc, \
                                  'info_retention' : True, \
                                  'defaultparams' : {'convention' : 'spaces', \
                                                     'suffix' : '_nmE4', \
                                                     'test_same_as_train' : True}, \
                                  'NArowtype' : 'parsenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmrc'}})
    process_dict.update({'nmE5' : {'dualprocess' : self._process_nmr4, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_nmr4, \
                                  'inverseprocess' : self._inverseprocess_nmrc, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'convention' : 'spaces', \
                                                     'suffix' : '_nmE4', \
                                                     'test_same_as_train' : True}, \
                                  'NArowtype' : 'parsenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmbr'}})
    process_dict.update({'nmE6' : {'dualprocess' : self._process_nmr4, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_nmr4, \
                                  'inverseprocess' : self._inverseprocess_nmrc, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'convention' : 'spaces', \
                                                     'suffix' : '_nmE4', \
                                                     'test_same_as_train' : True}, \
                                  'NArowtype' : 'parsenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'mnmx'}})
    process_dict.update({'nmE7' : {'dualprocess' : self._process_nmr4, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_nmr4, \
                                  'inverseprocess' : self._inverseprocess_nmrc, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'convention' : 'spaces', \
                                                     'suffix' : '_nmE7', \
                                                     'test_same_as_train' : False}, \
                                  'NArowtype' : 'parsenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmrc'}})
    process_dict.update({'nmE8' : {'dualprocess' : self._process_nmr4, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_nmr4, \
                                  'inverseprocess' : self._inverseprocess_nmrc, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'convention' : 'spaces', \
                                                     'suffix' : '_nmE7', \
                                                     'test_same_as_train' : False}, \
                                  'NArowtype' : 'parsenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmbr'}})
    process_dict.update({'nmE9' : {'dualprocess' : self._process_nmr4, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_nmr4, \
                                  'inverseprocess' : self._inverseprocess_nmrc, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'convention' : 'spaces', \
                                                     'suffix' : '_nmE7', \
                                                     'test_same_as_train' : False}, \
                                  'NArowtype' : 'parsenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'mnmx'}})
    process_dict.update({'ors7' : {'dualprocess' : self._process_spl2, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_spl2, \
                                  'inverseprocess' : self._inverseprocess_spl2, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix' : '_spl5', \
                                                     'test_same_as_train' : False, \
                                                     'consolidate_nonoverlaps' : True}, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'ord3'}})
    process_dict.update({'ors5' : {'dualprocess' : self._process_spl2, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_spl2, \
                                  'inverseprocess' : self._inverseprocess_spl2, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix' : '_spl5', \
                                                     'test_same_as_train' : False, \
                                                     'consolidate_nonoverlaps' : True}, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'ord3'}})
    process_dict.update({'ors6' : {'dualprocess' : self._process_spl2, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_spl2, \
                                  'inverseprocess' : self._inverseprocess_spl2, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix' : '_spl5', \
                                                     'test_same_as_train' : False, \
                                                     'consolidate_nonoverlaps' : True}, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'ord3'}})
    process_dict.update({'ordl' : {'dualprocess' : self._process_ordl, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_ordl, \
                                  'inverseprocess' : self._inverseprocess_ordl, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'ordl'}})
    process_dict.update({'ord2' : {'dualprocess' : self._process_ordl, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_ordl, \
                                  'inverseprocess' : self._inverseprocess_ordl, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'mnmx'}})
    process_dict.update({'ord3' : {'dualprocess' : self._process_ord3, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_ord3, \
                                  'inverseprocess' : self._inverseprocess_ord3, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'ord3'}})
    process_dict.update({'ord5' : {'dualprocess' : self._process_ordl, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_ordl, \
                                  'inverseprocess' : self._inverseprocess_ordl, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'ordl'}})
    process_dict.update({'maxb' : {'dualprocess' : self._process_maxb, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_maxb, \
                                  'inverseprocess' : self._inverseprocess_UPCS, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'maxb'}})
    process_dict.update({'or3b' : {'dualprocess' : self._process_ord3, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_ord3, \
                                  'inverseprocess' : self._inverseprocess_ord3, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'maxb'}})
    process_dict.update({'matx' : {'dualprocess' : self._process_maxb, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_maxb, \
                                  'inverseprocess' : self._inverseprocess_UPCS, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'onht'}})
    process_dict.update({'or3c' : {'dualprocess' : self._process_ord3, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_ord3, \
                                  'inverseprocess' : self._inverseprocess_ord3, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'onht'}})
    process_dict.update({'ma10' : {'dualprocess' : self._process_maxb, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_maxb, \
                                  'inverseprocess' : self._inverseprocess_UPCS, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : '1010'}})
    process_dict.update({'or3d' : {'dualprocess' : self._process_ord3, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_ord3, \
                                  'inverseprocess' : self._inverseprocess_ord3, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : '1010'}})
    process_dict.update({'ucct' : {'dualprocess' : self._process_ucct, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_ucct, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'ucct'}})
    process_dict.update({'ord4' : {'dualprocess' : self._process_ord3, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_ord3, \
                                  'inverseprocess' : self._inverseprocess_ord3, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'mnmx'}})
    process_dict.update({'ors2' : {'dualprocess' : self._process_spl2, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_spl2, \
                                  'inverseprocess' : self._inverseprocess_spl2, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix' : '_spl2', \
                                                     'test_same_as_train' : False, \
                                                     'consolidate_nonoverlaps' : False}, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'ord3'}})
    process_dict.update({'or10' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'mnmx'}})
    process_dict.update({'or11' : {'dualprocess' : self._process_1010, \
                                   'singleprocess' : None, \
                                   'postprocess' : self._postprocess_1010, \
                                   'inverseprocess' : self._inverseprocess_1010, \
                                   'info_retention' : True, \
                                   'NArowtype' : 'justNaN', \
                                   'MLinfilltype' : '1010', \
                                   'labelctgy' : 'ord3'}})
    process_dict.update({'or12' : {'dualprocess' : self._process_1010, \
                                   'singleprocess' : None, \
                                   'postprocess' : self._postprocess_1010, \
                                   'inverseprocess' : self._inverseprocess_1010, \
                                   'info_retention' : True, \
                                   'NArowtype' : 'justNaN', \
                                   'MLinfilltype' : '1010', \
                                   'labelctgy' : 'ord3'}})
    process_dict.update({'or13' : {'dualprocess' : self._process_1010, \
                                   'singleprocess' : None, \
                                   'postprocess' : self._postprocess_1010, \
                                   'inverseprocess' : self._inverseprocess_1010, \
                                   'info_retention' : True, \
                                   'NArowtype' : 'justNaN', \
                                   'MLinfilltype' : '1010', \
                                   'labelctgy' : 'ord3'}})
    process_dict.update({'or14' : {'dualprocess' : self._process_1010, \
                                   'singleprocess' : None, \
                                   'postprocess' : self._postprocess_1010, \
                                   'inverseprocess' : self._inverseprocess_1010, \
                                   'info_retention' : True, \
                                   'NArowtype' : 'justNaN', \
                                   'MLinfilltype' : '1010', \
                                   'labelctgy' : 'ord3'}})
    process_dict.update({'or15' : {'dualprocess' : None, \
                                   'singleprocess' : self._process_UPCS, \
                                   'postprocess' : None, \
                                   'inverseprocess' : self._inverseprocess_UPCS, \
                                   'info_retention' : False, \
                                   'inplace_option' : True, \
                                   'NArowtype' : 'justNaN', \
                                   'MLinfilltype' : 'exclude', \
                                   'labelctgy' : 'ord3'}})
    process_dict.update({'or16' : {'dualprocess' : None, \
                                   'singleprocess' : self._process_UPCS, \
                                   'postprocess' : None, \
                                   'inverseprocess' : self._inverseprocess_UPCS, \
                                   'info_retention' : False, \
                                   'inplace_option' : True, \
                                   'NArowtype' : 'justNaN', \
                                   'MLinfilltype' : 'exclude', \
                                   'labelctgy' : 'ord3'}})
    process_dict.update({'or17' : {'dualprocess' : None, \
                                   'singleprocess' : self._process_UPCS, \
                                   'postprocess' : None, \
                                   'inverseprocess' : self._inverseprocess_UPCS, \
                                   'info_retention' : False, \
                                   'inplace_option' : True, \
                                   'NArowtype' : 'justNaN', \
                                   'MLinfilltype' : 'exclude', \
                                   'labelctgy' : 'ord3'}})
    process_dict.update({'or18' : {'dualprocess' : None, \
                                   'singleprocess' : self._process_UPCS, \
                                   'postprocess' : None, \
                                   'inverseprocess' : self._inverseprocess_UPCS, \
                                   'info_retention' : False, \
                                   'inplace_option' : True, \
                                   'NArowtype' : 'justNaN', \
                                   'MLinfilltype' : 'exclude', \
                                   'labelctgy' : 'ord3'}})
    process_dict.update({'or19' : {'dualprocess' : None, \
                                   'singleprocess' : self._process_UPCS, \
                                   'postprocess' : None, \
                                   'inverseprocess' : self._inverseprocess_UPCS, \
                                   'info_retention' : False, \
                                   'inplace_option' : True, \
                                   'NArowtype' : 'justNaN', \
                                   'MLinfilltype' : 'exclude', \
                                   'labelctgy' : 'ord3'}})
    process_dict.update({'or20' : {'dualprocess' : None, \
                                   'singleprocess' : self._process_UPCS, \
                                   'postprocess' : None, \
                                   'inverseprocess' : self._inverseprocess_UPCS, \
                                   'info_retention' : False, \
                                   'inplace_option' : True, \
                                   'NArowtype' : 'justNaN', \
                                   'MLinfilltype' : 'exclude', \
                                   'labelctgy' : 'ord3'}})
    process_dict.update({'or21' : {'dualprocess' : None, \
                                   'singleprocess' : self._process_UPCS, \
                                   'postprocess' : None, \
                                   'inverseprocess' : self._inverseprocess_UPCS, \
                                   'info_retention' : False, \
                                   'inplace_option' : True, \
                                   'NArowtype' : 'justNaN', \
                                   'MLinfilltype' : 'exclude', \
                                   'labelctgy' : 'ord3'}})
    process_dict.update({'or22' : {'dualprocess' : None, \
                                   'singleprocess' : self._process_UPCS, \
                                   'postprocess' : None, \
                                   'inverseprocess' : self._inverseprocess_UPCS, \
                                   'info_retention' : False, \
                                   'inplace_option' : True, \
                                   'NArowtype' : 'justNaN', \
                                   'MLinfilltype' : 'exclude', \
                                   'labelctgy' : 'ord3'}})
    process_dict.update({'or23' : {'dualprocess' : None, \
                                   'singleprocess' : self._process_UPCS, \
                                   'postprocess' : None, \
                                   'inverseprocess' : self._inverseprocess_UPCS, \
                                   'info_retention' : False, \
                                   'inplace_option' : True, \
                                   'NArowtype' : 'justNaN', \
                                   'MLinfilltype' : 'exclude', \
                                   'labelctgy' : 'ord3'}})
    process_dict.update({'om10' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'mnmx'}})
    process_dict.update({'mmor' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'mnmx'}})
    process_dict.update({'1010' : {'dualprocess' : self._process_1010, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_1010, \
                                  'inverseprocess' : self._inverseprocess_1010, \
                                  'info_retention' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : '1010', \
                                  'labelctgy' : '1010'}})
    process_dict.update({'bxcx' : {'dualprocess' : self._process_bxcx, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_bxcx, \
                                  'NArowtype' : 'positivenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmbr'}})
    process_dict.update({'tmsc' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'time' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'time'}})
    process_dict.update({'tmzn' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_tmzn, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'tmzn'}})
    process_dict.update({'date' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_tmzn, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'time'}})
    process_dict.update({'dat2' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_tmzn, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'hldy'}})
    process_dict.update({'dat3' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_tmzn, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'dat4' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_tmzn, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'dat5' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_tmzn, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'dat6' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_tmzn, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'year' : {'dualprocess' : self._process_time, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_time, \
                                  'inverseprocess' : self._inverseprocess_year, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'scale' : 'year', \
                                                     'suffix' : '_year', \
                                                     'normalization' : 'zscore'}, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'time'}})
    process_dict.update({'yea2' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'time'}})
    process_dict.update({'yrsn' : {'dualprocess' : self._process_tmsc, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_tmsc, \
                                  'defaultparams' : {'scale' : 'year', \
                                                     'suffix' : '_yrsn', \
                                                     'function' : 'sin'}, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'yrcs' : {'dualprocess' : self._process_tmsc, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_tmsc, \
                                  'defaultparams' : {'scale' : 'year', \
                                                     'suffix' : '_yrcs', \
                                                     'function' : 'cos'}, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'mnth' : {'dualprocess' : self._process_time, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_time, \
                                  'defaultparams' : {'scale' : 'month', \
                                                     'suffix' : '_mnth', \
                                                     'normalization' : 'zscore'}, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'time'}})
    process_dict.update({'mnt2' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'mnt3' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'mnt4' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'mnt5' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'mnt6' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'mnsn' : {'dualprocess' : self._process_tmsc, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_tmsc, \
                                  'defaultparams' : {'scale' : 'month', \
                                                     'suffix' : '_mnsn', \
                                                     'function' : 'sin'}, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'mncs' : {'dualprocess' : self._process_tmsc, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_tmsc, \
                                  'defaultparams' : {'scale' : 'month', \
                                                     'suffix' : '_mncs', \
                                                     'function' : 'cos'}, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'mdsn' : {'dualprocess' : self._process_tmsc, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_tmsc, \
                                  'defaultparams' : {'scale' : 'monthday', \
                                                     'suffix' : '_mdsn', \
                                                     'function' : 'sin'}, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'mdcs' : {'dualprocess' : self._process_tmsc, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_tmsc, \
                                  'defaultparams' : {'scale' : 'monthday', \
                                                     'suffix' : '_mdcs', \
                                                     'function' : 'cos'}, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'days' : {'dualprocess' : self._process_time, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_time, \
                                  'defaultparams' : {'scale' : 'day', \
                                                     'suffix' : '_days', \
                                                     'normalization' : 'zscore'}, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'time'}})
    process_dict.update({'day2' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'day3' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'day4' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'day5' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'dysn' : {'dualprocess' : self._process_tmsc, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_tmsc, \
                                  'defaultparams' : {'scale' : 'day', \
                                                     'suffix' : '_dysn', \
                                                     'function' : 'sin'}, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'dycs' : {'dualprocess' : self._process_tmsc, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_tmsc, \
                                  'defaultparams' : {'scale' : 'day', \
                                                     'suffix' : '_dycs', \
                                                     'function' : 'cos'}, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'dhms' : {'dualprocess' : self._process_tmsc, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_tmsc, \
                                  'defaultparams' : {'scale' : 'dayhourminute', \
                                                     'suffix' : '_dhms', \
                                                     'function' : 'sin'}, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'dhmc' : {'dualprocess' : self._process_tmsc, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_tmsc, \
                                  'defaultparams' : {'scale' : 'dayhourminute', \
                                                     'suffix' : '_dhmc', \
                                                     'function' : 'cos'}, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'hour' : {'dualprocess' : self._process_time, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_time, \
                                  'defaultparams' : {'scale' : 'hour', \
                                                     'suffix' : '_hour', \
                                                     'normalization' : 'zscore'}, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'time'}})
    process_dict.update({'hrs2' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'hrs3' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'hrs4' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'hrsn' : {'dualprocess' : self._process_tmsc, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_tmsc, \
                                  'defaultparams' : {'scale' : 'hour', \
                                                     'suffix' : '_hrsn', \
                                                     'function' : 'sin'}, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'hrcs' : {'dualprocess' : self._process_tmsc, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_tmsc, \
                                  'defaultparams' : {'scale' : 'hour', \
                                                     'suffix' : '_hrcs', \
                                                     'function' : 'cos'}, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'hmss' : {'dualprocess' : self._process_tmsc, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_tmsc, \
                                  'defaultparams' : {'scale' : 'hourminutesecond', \
                                                     'suffix' : '_hmss', \
                                                     'function' : 'sin'}, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'hmsc' : {'dualprocess' : self._process_tmsc, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_tmsc, \
                                  'defaultparams' : {'scale' : 'hourminutesecond', \
                                                     'suffix' : '_hmsc', \
                                                     'function' : 'cos'}, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'mint' : {'dualprocess' : self._process_time, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_time, \
                                  'defaultparams' : {'scale' : 'minute', \
                                                     'suffix' : '_mint', \
                                                     'normalization' : 'zscore'}, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'time'}})
    process_dict.update({'min2' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'min3' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'min4' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'misn'}})
    process_dict.update({'misn' : {'dualprocess' : self._process_tmsc, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_tmsc, \
                                  'defaultparams' : {'scale' : 'minute', \
                                                     'suffix' : '_misn', \
                                                     'function' : 'sin'}, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'mics' : {'dualprocess' : self._process_tmsc, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_tmsc, \
                                  'defaultparams' : {'scale' : 'minute', \
                                                     'suffix' : '_mics', \
                                                     'function' : 'cos'}, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'mics'}})
    process_dict.update({'mssn' : {'dualprocess' : self._process_tmsc, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_tmsc, \
                                  'defaultparams' : {'scale' : 'minutesecond', \
                                                     'suffix' : '_mssn', \
                                                     'function' : 'sin'}, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'mscs' : {'dualprocess' : self._process_tmsc, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_tmsc, \
                                  'defaultparams' : {'scale' : 'minutesecond', \
                                                     'suffix' : '_mscs', \
                                                     'function' : 'cos'}, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'scnd' : {'dualprocess' : self._process_time, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_time, \
                                  'defaultparams' : {'scale' : 'second', \
                                                     'suffix' : '_scnd', \
                                                     'normalization' : 'zscore'}, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'time'}})
    process_dict.update({'scn2' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'scsn' : {'dualprocess' : self._process_tmsc, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_tmsc, \
                                  'defaultparams' : {'scale' : 'second', \
                                                     'suffix' : '_scsn', \
                                                     'function' : 'sin'}, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'sccs' : {'dualprocess' : self._process_tmsc, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_tmsc, \
                                  'defaultparams' : {'scale' : 'second', \
                                                     'suffix' : '_sccs', \
                                                     'function' : 'cos'}, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'tmsc'}})
    process_dict.update({'bxc2' : {'dualprocess' : self._process_bxcx, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_bxcx, \
                                  'NArowtype' : 'nonzeronumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmbr'}})
    process_dict.update({'bxc3' : {'dualprocess' : self._process_bxcx, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_bxcx, \
                                  'NArowtype' : 'positivenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmbr'}})
    process_dict.update({'bxc4' : {'dualprocess' : self._process_bxcx, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_bxcx, \
                                  'NArowtype' : 'positivenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmbr'}})
    process_dict.update({'bxc5' : {'dualprocess' : self._process_bxcx, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_bxcx, \
                                  'NArowtype' : 'positivenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmbr'}})
    process_dict.update({'ntgr' : {'dualprocess' : self._process_ord3, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_ord3, \
                                  'inverseprocess' : self._inverseprocess_ord3, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'integer', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'mnmx'}})
    process_dict.update({'ntg2' : {'dualprocess' : self._process_ord3, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_ord3, \
                                  'inverseprocess' : self._inverseprocess_ord3, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'integer', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'mnmx'}})
    process_dict.update({'ntg3' : {'dualprocess' : self._process_ord3, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_ord3, \
                                  'inverseprocess' : self._inverseprocess_ord3, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'integer', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'mnmx'}})
    process_dict.update({'pwrs' : {'dualprocess' : self._process_pwrs, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_pwrs, \
                                  'inverseprocess' : self._inverseprocess_pwr2, \
                                  'info_retention' : False, \
                                  'NArowtype' : 'positivenumeric', \
                                  'MLinfilltype' : 'multirt', \
                                  'labelctgy' : 'pwrs'}})
    process_dict.update({'pwr2' : {'dualprocess' : self._process_pwrs, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_pwrs, \
                                  'inverseprocess' : self._inverseprocess_pwr2, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'negvalues' : True}, \
                                  'NArowtype' : 'nonzeronumeric', \
                                  'MLinfilltype' : 'multirt', \
                                  'labelctgy' : 'pwrs'}})
    process_dict.update({'log0' : {'dualprocess' : self._process_log0, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_log0, \
                                  'inverseprocess' : self._inverseprocess_log0, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'positivenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'log0'}})
    process_dict.update({'log1' : {'dualprocess' : self._process_log0, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_log0, \
                                  'inverseprocess' : self._inverseprocess_log0, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'positivenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'log0'}})
    process_dict.update({'logn' : {'dualprocess' : self._process_logn, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_logn, \
                                  'inverseprocess' : self._inverseprocess_logn, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'positivenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'logn'}})
    process_dict.update({'lgnm' : {'dualprocess' : self._process_logn, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_logn, \
                                  'inverseprocess' : self._inverseprocess_logn, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'positivenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmbr'}})
    process_dict.update({'sqrt' : {'dualprocess' : self._process_sqrt, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_sqrt, \
                                  'inverseprocess' : self._inverseprocess_sqrt, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'nonnegativenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'sqrt'}})
    process_dict.update({'addd' : {'dualprocess' : self._process_addd, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_addd, \
                                  'inverseprocess' : self._inverseprocess_addd, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'addd'}})
    process_dict.update({'sbtr' : {'dualprocess' : self._process_sbtr, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_sbtr, \
                                  'inverseprocess' : self._inverseprocess_sbtr, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'sbtr'}})
    process_dict.update({'mltp' : {'dualprocess' : self._process_mltp, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_mltp, \
                                  'inverseprocess' : self._inverseprocess_mltp, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'mltp'}})
    process_dict.update({'divd' : {'dualprocess' : self._process_divd, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_divd, \
                                  'inverseprocess' : self._inverseprocess_divd, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'divd'}})
    process_dict.update({'rais' : {'dualprocess' : self._process_rais, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_rais, \
                                  'inverseprocess' : self._inverseprocess_rais, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'rais'}})
    process_dict.update({'absl' : {'dualprocess' : self._process_absl, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_absl, \
                                  'inverseprocess' : self._inverseprocess_absl, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'absl'}})
    process_dict.update({'bkt1' : {'dualprocess' : self._process_bkt1, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_bkt1, \
                                  'inverseprocess' : self._inverseprocess_bkt1, \
                                  'info_retention' : False, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'multirt', \
                                  'labelctgy' : 'bkt1'}})
    process_dict.update({'bkt2' : {'dualprocess' : self._process_bkt2, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_bkt2, \
                                  'inverseprocess' : self._inverseprocess_bkt2, \
                                  'info_retention' : False, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'multirt', \
                                  'labelctgy' : 'bkt2'}})
    process_dict.update({'bkt3' : {'dualprocess' : self._process_bkt3, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_bkt3, \
                                  'inverseprocess' : self._inverseprocess_bkt3, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'bkt3'}})
    process_dict.update({'bkt4' : {'dualprocess' : self._process_bkt4, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_bkt4, \
                                  'inverseprocess' : self._inverseprocess_bkt4, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'bkt4'}})
    process_dict.update({'wkdy' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_wkdy, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'binary', \
                                  'labelctgy' : 'wkdy'}})
    process_dict.update({'bshr' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_bshr, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'binary', \
                                  'labelctgy' : 'bshr'}})
    process_dict.update({'hldy' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_hldy, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'binary', \
                                  'labelctgy' : 'hldy'}})
    process_dict.update({'wkds' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_wkds, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'text'}})
    process_dict.update({'wkdo' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_wkds, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'ord3'}})
    process_dict.update({'mnts' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_mnts, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'text'}})
    process_dict.update({'mnto' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_mnts, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'ord3'}})
    process_dict.update({'bins' : {'dualprocess' : self._process_bins, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_bins, \
                                  'inverseprocess' : self._inverseprocess_bins, \
                                  'info_retention' : False, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'multirt', \
                                  'labelctgy' : 'bins'}})
    process_dict.update({'bint' : {'dualprocess' : self._process_bins, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_bins, \
                                  'inverseprocess' : self._inverseprocess_bins, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'normalizedinput' : True}, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'multirt', \
                                  'labelctgy' : 'bins'}})
    process_dict.update({'bsor' : {'dualprocess' : self._process_bsor, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_bsor, \
                                  'inverseprocess' : self._inverseprocess_bsor, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'bsor'}})
    process_dict.update({'btor' : {'dualprocess' : self._process_bsor, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_bsor, \
                                  'inverseprocess' : self._inverseprocess_bsor, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'normalizedinput' : True}, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'bsor'}})
    process_dict.update({'bnwd' : {'dualprocess' : self._process_bnwd, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_bnwd, \
                                  'inverseprocess' : self._inverseprocess_bnwd, \
                                  'info_retention' : False, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'multirt', \
                                  'labelctgy' : 'bnwd'}})
    process_dict.update({'bnwK' : {'dualprocess' : self._process_bnwd, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_bnwd, \
                                  'inverseprocess' : self._inverseprocess_bnwd, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix':'_bnwK', 'width':1000}, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'multirt', \
                                  'labelctgy' : 'bnwd'}})
    process_dict.update({'bnwM' : {'dualprocess' : self._process_bnwd, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_bnwd, \
                                  'inverseprocess' : self._inverseprocess_bnwd, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix':'_bnwM', 'width':1000000}, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'multirt', \
                                  'labelctgy' : 'bnwd'}})
    process_dict.update({'bnwo' : {'dualprocess' : self._process_bnwo, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_bnwo, \
                                  'inverseprocess' : self._inverseprocess_bnwo, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'bnwo'}})
    process_dict.update({'bnKo' : {'dualprocess' : self._process_bnwo, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_bnwo, \
                                  'inverseprocess' : self._inverseprocess_bnwo, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'defaultparams' : {'suffix':'_bnKo', 'width':1000}, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'bnwo'}})
    process_dict.update({'bnMo' : {'dualprocess' : self._process_bnwo, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_bnwo, \
                                  'inverseprocess' : self._inverseprocess_bnwo, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'defaultparams' : {'suffix':'_bnMo', 'width':1000000}, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'bnwo'}})
    process_dict.update({'bnep' : {'dualprocess' : self._process_bnep, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_bnep, \
                                  'inverseprocess' : self._inverseprocess_bnep, \
                                  'info_retention' : False, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'multirt', \
                                  'labelctgy' : 'bnep'}})
    process_dict.update({'bne7' : {'dualprocess' : self._process_bnep, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_bnep, \
                                  'inverseprocess' : self._inverseprocess_bnep, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix':'_bne7', 'bincount':7}, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'multirt', \
                                  'labelctgy' : 'bnep'}})
    process_dict.update({'bne9' : {'dualprocess' : self._process_bnep, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_bnep, \
                                  'inverseprocess' : self._inverseprocess_bnep, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix':'_bne9', 'bincount':9}, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'multirt', \
                                  'labelctgy' : 'bnep'}})
    process_dict.update({'bneo' : {'dualprocess' : self._process_bneo, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_bneo, \
                                  'inverseprocess' : self._inverseprocess_bneo, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'bneo'}})
    process_dict.update({'bn7o' : {'dualprocess' : self._process_bneo, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_bneo, \
                                  'inverseprocess' : self._inverseprocess_bneo, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix':'_bn7o', 'bincount':7}, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'bneo'}})
    process_dict.update({'bn9o' : {'dualprocess' : self._process_bneo, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_bneo, \
                                  'inverseprocess' : self._inverseprocess_bneo, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix':'_bn9o', 'bincount':9}, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'bneo'}})
    process_dict.update({'tlbn' : {'dualprocess' : self._process_tlbn, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_tlbn, \
                                  'inverseprocess' : self._inverseprocess_tlbn, \
                                  'info_retention' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'concurrent_nmbr', \
                                  'labelctgy' : 'tlbn'}})
    process_dict.update({'pwor' : {'dualprocess' : self._process_pwor, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_pwor, \
                                  'inverseprocess' : self._inverseprocess_por2, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'positivenumeric', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'pwor'}})
    process_dict.update({'por2' : {'dualprocess' : self._process_pwor, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_pwor, \
                                  'inverseprocess' : self._inverseprocess_por2, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'defaultparams' : {'negvalues' : True}, \
                                  'NArowtype' : 'nonzeronumeric', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'pwor'}})
    process_dict.update({'por3' : {'dualprocess' : self._process_pwor, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_pwor, \
                                  'inverseprocess' : self._inverseprocess_por2, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'defaultparams' : {'negvalues' : True}, \
                                  'NArowtype' : 'nonzeronumeric', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : '1010'}})
    process_dict.update({'bkb3' : {'dualprocess' : self._process_bkt3, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_bkt3, \
                                  'inverseprocess' : self._inverseprocess_bkt3, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : '1010'}})
    process_dict.update({'bkb4' : {'dualprocess' : self._process_bkt4, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_bkt4, \
                                  'inverseprocess' : self._inverseprocess_bkt4, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : '1010'}})
    process_dict.update({'bsbn' : {'dualprocess' : self._process_bsor, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_bsor, \
                                  'inverseprocess' : self._inverseprocess_bsor, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : '1010'}})
    process_dict.update({'bnwb' : {'dualprocess' : self._process_bnwo, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_bnwo, \
                                  'inverseprocess' : self._inverseprocess_bnwo, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : '1010'}})
    process_dict.update({'bnKb' : {'dualprocess' : self._process_bnwo, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_bnwo, \
                                  'inverseprocess' : self._inverseprocess_bnwo, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix':'_bnKo', 'width':1000}, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : '1010'}})
    process_dict.update({'bnMb' : {'dualprocess' : self._process_bnwo, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_bnwo, \
                                  'inverseprocess' : self._inverseprocess_bnwo, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix':'_bnMo', 'width':1000000}, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : '1010'}})
    process_dict.update({'bneb' : {'dualprocess' : self._process_bneo, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_bneo, \
                                  'inverseprocess' : self._inverseprocess_bneo, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : '1010'}})
    process_dict.update({'bn7b' : {'dualprocess' : self._process_bneo, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_bneo, \
                                  'inverseprocess' : self._inverseprocess_bneo, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix':'_bn7o', 'bincount':7}, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : '1010'}})
    process_dict.update({'bn9b' : {'dualprocess' : self._process_bneo, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_bneo, \
                                  'inverseprocess' : self._inverseprocess_bneo, \
                                  'info_retention' : False, \
                                  'defaultparams' : {'suffix':'_bn9o', 'bincount':9}, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : '1010'}})
    process_dict.update({'pwbn' : {'dualprocess' : self._process_pwor, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_pwor, \
                                  'inverseprocess' : self._inverseprocess_por2, \
                                  'info_retention' : False, \
                                  'NArowtype' : 'positivenumeric', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : '1010'}})
    process_dict.update({'DPn3' : {'dualprocess' : self._process_numerical, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_numerical, \
                                  'inverseprocess' : self._inverseprocess_nmbr, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'DPnb'}})
    process_dict.update({'DPnb' : {'dualprocess' : self._process_DPnb, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_DPnb, \
                                  'inverseprocess' : self._inverseprocess_UPCS, \
                                  'info_retention' : True, \
                                  'inplace_option' : False, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'DPnb'}})
    process_dict.update({'DPm2' : {'dualprocess' : self._process_mnmx, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_mnmx, \
                                  'inverseprocess' : self._inverseprocess_mnmx, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'DPmm'}})
    process_dict.update({'DPmm' : {'dualprocess' : self._process_DPmm, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_DPmm, \
                                  'inverseprocess' : self._inverseprocess_UPCS, \
                                  'info_retention' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'DPmm'}})
    process_dict.update({'DPrt' : {'dualprocess' : self._process_DPrt, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_DPrt, \
                                  'inverseprocess' : self._inverseprocess_retn, \
                                  'info_retention' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'DPrt'}})
    process_dict.update({'DLn3' : {'dualprocess' : self._process_numerical, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_numerical, \
                                  'inverseprocess' : self._inverseprocess_nmbr, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'DLnb'}})
    process_dict.update({'DLnb' : {'dualprocess' : self._process_DPnb, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_DPnb, \
                                  'inverseprocess' : self._inverseprocess_UPCS, \
                                  'info_retention' : True, \
                                  'defaultparams' : {'noisedistribution' : 'laplace'}, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'DLnb'}})
    process_dict.update({'DLm2' : {'dualprocess' : self._process_mnmx, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_mnmx, \
                                  'inverseprocess' : self._inverseprocess_mnmx, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'DLmm'}})
    process_dict.update({'DLmm' : {'dualprocess' : self._process_DPmm, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_DPmm, \
                                  'inverseprocess' : self._inverseprocess_UPCS, \
                                  'info_retention' : True, \
                                  'defaultparams' : {'noisedistribution' : 'laplace'}, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'DLmm'}})
    process_dict.update({'DLrt' : {'dualprocess' : self._process_DPrt, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_DPrt, \
                                  'inverseprocess' : self._inverseprocess_retn, \
                                  'info_retention' : True, \
                                  'defaultparams' : {'noisedistribution' : 'laplace'}, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'DLrt'}})
    process_dict.update({'DPb2' : {'dualprocess' : self._process_binary, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_binary, \
                                  'inverseprocess' : self._inverseprocess_bnry, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'binary', \
                                  'labelctgy' : 'DPbn'}})
    process_dict.update({'DPbn' : {'dualprocess' : self._process_DPbn, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_DPbn, \
                                  'inverseprocess' : self._inverseprocess_UPCS, \
                                  'info_retention' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'binary', \
                                  'labelctgy' : 'DPbn'}})
    process_dict.update({'DPo4' : {'dualprocess' : self._process_ord3, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_ord3, \
                                  'inverseprocess' : self._inverseprocess_ord3, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'DPod'}})
    process_dict.update({'DPod' : {'dualprocess' : self._process_DPod, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_DPod, \
                                  'inverseprocess' : self._inverseprocess_UPCS, \
                                  'info_retention' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'DPod'}})
    process_dict.update({'DPo5' : {'dualprocess' : self._process_ord3, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_ord3, \
                                  'inverseprocess' : self._inverseprocess_ord3, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'onht'}})
    process_dict.update({'DPo2' : {'dualprocess' : self._process_DPod, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_DPod, \
                                  'inverseprocess' : self._inverseprocess_UPCS, \
                                  'info_retention' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'onht'}})
    process_dict.update({'DPoh' : {'dualprocess' : self._process_DPod, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_DPod, \
                                  'inverseprocess' : self._inverseprocess_UPCS, \
                                  'info_retention' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'onht'}})
    process_dict.update({'DP10' : {'dualprocess' : self._process_DPod, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_DPod, \
                                  'inverseprocess' : self._inverseprocess_UPCS, \
                                  'info_retention' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : '1010'}})
    process_dict.update({'DPo6' : {'dualprocess' : self._process_ord3, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_ord3, \
                                  'inverseprocess' : self._inverseprocess_ord3, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : '1010'}})
    process_dict.update({'DPo3' : {'dualprocess' : self._process_DPod, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_DPod, \
                                  'inverseprocess' : self._inverseprocess_UPCS, \
                                  'info_retention' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : '1010'}})
    process_dict.update({'qbt1' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_qbt1, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self._inverseprocess_qbt1, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'qbt1'}})
    process_dict.update({'qbt2' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_qbt1, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self._inverseprocess_qbt1, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'defaultparams' : {'suffix' : 'qbt2', \
                                                     'integer_bits' : 15, \
                                                     'fractional_bits' : 0}, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'qbt2'}})
    process_dict.update({'qbt3' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_qbt1, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self._inverseprocess_qbt1, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'defaultparams' : {'suffix' : 'qbt3', \
                                                     'sign_bit' : False, \
                                                     'fractional_bits' : 13}, \
                                  'NArowtype' : 'nonnegativenumeric', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'qbt3'}})
    process_dict.update({'qbt4' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_qbt1, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self._inverseprocess_qbt1, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'defaultparams' : {'suffix' : 'qbt4', \
                                                     'sign_bit' : False, \
                                                     'integer_bits' : 16, \
                                                     'fractional_bits' : 0}, \
                                  'NArowtype' : 'nonnegativenumeric', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'qbt4'}})
    process_dict.update({'nmqb' : {'dualprocess' : self._process_numerical, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_numerical, \
                                  'inverseprocess' : self._inverseprocess_nmbr, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'qbt1'}})
    process_dict.update({'nmq2' : {'dualprocess' : self._process_numerical, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_numerical, \
                                  'inverseprocess' : self._inverseprocess_nmbr, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmbr'}})
    process_dict.update({'mmqb' : {'dualprocess' : self._process_mnmx, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_mnmx, \
                                  'inverseprocess' : self._inverseprocess_mnmx, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'qbt3'}})
    process_dict.update({'mmq2' : {'dualprocess' : self._process_mnmx, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_mnmx, \
                                  'inverseprocess' : self._inverseprocess_mnmx, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'mnmx'}})
    process_dict.update({'NArw' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_NArw, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'boolexclude', \
                                  'labelctgy' : 'NArw'}})
    process_dict.update({'NAr2' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_NArw, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'boolexclude', \
                                  'labelctgy' : 'NArw'}})
    process_dict.update({'NAr3' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_NArw, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'positivenumeric', \
                                  'MLinfilltype' : 'boolexclude', \
                                  'labelctgy' : 'NArw'}})
    process_dict.update({'NAr4' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_NArw, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'nonnegativenumeric', \
                                  'MLinfilltype' : 'boolexclude', \
                                  'labelctgy' : 'NArw'}})
    process_dict.update({'NAr5' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_NArw, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'integer', \
                                  'MLinfilltype' : 'boolexclude', \
                                  'labelctgy' : 'NArw'}})
    process_dict.update({'null' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_null, \
                                  'postprocess' : None, \
                                  'inplace_option' : False, \
                                  'NArowtype' : 'exclude', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : None}})
    process_dict.update({'copy' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_copy, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self._inverseprocess_excl, \
                                  'info_retention' : True, \
                                  'NArowtype' : 'exclude', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'copy'}})
    process_dict.update({'excl' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_excl, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self._inverseprocess_excl, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'exclude', \
                                  'MLinfilltype' : 'totalexclude', \
                                  'labelctgy' : 'excl'}})
    process_dict.update({'exc2' : {'dualprocess' : self._process_exc2, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_exc2, \
                                  'inverseprocess' : self._inverseprocess_UPCS, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'exc2'}})
    process_dict.update({'exc3' : {'dualprocess' : self._process_exc2, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_exc2, \
                                  'inverseprocess' : self._inverseprocess_UPCS, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'exc2'}})
    process_dict.update({'exc4' : {'dualprocess' : self._process_exc2, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_exc2, \
                                  'inverseprocess' : self._inverseprocess_UPCS, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'exc2'}})
    process_dict.update({'exc5' : {'dualprocess' : self._process_exc5, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_exc5, \
                                  'inverseprocess' : self._inverseprocess_UPCS, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'integer', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'exc5'}})
    process_dict.update({'exc6' : {'dualprocess' : self._process_exc2, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_exc2, \
                                  'inverseprocess' : self._inverseprocess_UPCS, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'exc2'}})
    process_dict.update({'exc7' : {'dualprocess' : self._process_exc5, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_exc5, \
                                  'inverseprocess' : self._inverseprocess_UPCS, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'integer', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'exc5'}})
    process_dict.update({'exc8' : {'dualprocess' : self._process_exc5, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_exc5, \
                                  'inverseprocess' : self._inverseprocess_UPCS, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'integer', \
                                  'MLinfilltype' : 'integer', \
                                  'labelctgy' : 'exc5'}})
    process_dict.update({'exc9' : {'dualprocess' : self._process_exc5, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_exc5, \
                                  'inverseprocess' : self._inverseprocess_UPCS, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'integer', \
                                  'MLinfilltype' : 'integer', \
                                  'labelctgy' : 'exc5'}})
    process_dict.update({'shfl' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_shfl, \
                                  'postprocess' : None, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'exclude', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'shfl'}})
    process_dict.update({'nmbd' : {'dualprocess' : self._process_numerical, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_numerical, \
                                  'inverseprocess' : self._inverseprocess_nmbr, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmbr'}})
    process_dict.update({'101d' : {'dualprocess' : self._process_1010, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_1010, \
                                  'inverseprocess' : self._inverseprocess_1010, \
                                  'info_retention' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : '1010', \
                                  'labelctgy' : '1010'}})
    process_dict.update({'ordd' : {'dualprocess' : self._process_ord3, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_ord3, \
                                  'inverseprocess' : self._inverseprocess_ord3, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'ord3'}})
    process_dict.update({'texd' : {'dualprocess' : self._process_text, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_text, \
                                  'inverseprocess' : self._inverseprocess_text, \
                                  'info_retention' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'multirt', \
                                  'labelctgy' : 'text'}})
    process_dict.update({'bnrd' : {'dualprocess' : self._process_binary, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_binary, \
                                  'inverseprocess' : self._inverseprocess_bnry, \
                                  'info_retention' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'binary', \
                                  'labelctgy' : 'bnry'}})
    process_dict.update({'datd' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'mdsn'}})
    process_dict.update({'nuld' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_null, \
                                  'postprocess' : None, \
                                  'inplace_option' : False, \
                                  'NArowtype' : 'exclude', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : None}})
    process_dict.update({'lbnm' : {'dualprocess' : self._process_exc2, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_exc2, \
                                  'inverseprocess' : self._inverseprocess_UPCS, \
                                  'info_retention' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'exc2'}})
    process_dict.update({'lbnb' : {'dualprocess' : self._process_numerical, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_numerical, \
                                  'inverseprocess' : self._inverseprocess_nmbr, \
                                  'info_retention' : True, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'nmbr'}})
    process_dict.update({'lb10' : {'dualprocess' : self._process_text, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_text, \
                                  'inverseprocess' : self._inverseprocess_text, \
                                  'info_retention' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'multirt', \
                                  'labelctgy' : 'text'}})
    process_dict.update({'lbor' : {'dualprocess' : self._process_ordl, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_ordl, \
                                  'inverseprocess' : self._inverseprocess_ordl, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'ordl'}})
    process_dict.update({'lbos' : {'dualprocess' : self._process_ordl, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_ordl, \
                                  'inverseprocess' : self._inverseprocess_ordl, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'ordl'}})
    process_dict.update({'lbte' : {'dualprocess' : self._process_text, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_text, \
                                  'inverseprocess' : self._inverseprocess_text, \
                                  'info_retention' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'multirt', \
                                  'labelctgy' : 'text'}})
    process_dict.update({'lbbn' : {'dualprocess' : self._process_binary, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_binary, \
                                  'inverseprocess' : self._inverseprocess_bnry, \
                                  'info_retention' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'multirt', \
                                  'labelctgy' : 'bnry'}})
    process_dict.update({'lbsm' : {'dualprocess' : self._process_smth, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_smth, \
                                  'inverseprocess' : self._inverseprocess_smth, \
                                  'info_retention' : True, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'smth'}})
    process_dict.update({'lbfs' : {'dualprocess' : self._process_smth, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_smth, \
                                  'inverseprocess' : self._inverseprocess_smth, \
                                  'info_retention' : True, \
                                  'defaultparams' : {'LSfit' : True}, \
                                  'NArowtype' : 'justNaN', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'smth'}})
    process_dict.update({'lbda' : {'dualprocess' : None, \
                                  'singleprocess' : None, \
                                  'postprocess' : None, \
                                  'NArowtype' : 'datetime', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'mdsn'}})
    process_dict.update({'lgnr' : {'dualprocess' : self._process_absl, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_absl, \
                                  'inverseprocess' : self._inverseprocess_absl, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'nonzeronumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'qbt5'}})
    process_dict.update({'lgn2' : {'dualprocess' : self._process_logn, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_logn, \
                                  'inverseprocess' : self._inverseprocess_logn, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'NArowtype' : 'positivenumeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'logn'}})
    process_dict.update({'qbt5' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_qbt1, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self._inverseprocess_qbt1, \
                                  'info_retention' : True, \
                                  'inplace_option' : True, \
                                  'defaultparams' : {'suffix' : 'lgnr',
                                                          'integer_bits' : 4,
                                                          'fractional_bits' : 3,
                                                          'sign_bit' : True}, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'qbt1'}})
    process_dict.update({'sgn3' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_copy, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self._inverseprocess_excl, \
                                  'info_retention' : True, \
                                  'defaultparams' : {'suffix' : '_'}, \
                                  'NArowtype' : 'exclude', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'copy'}})
    process_dict.update({'sgn4' : {'dualprocess' : None, \
                                  'singleprocess' : self._process_copy, \
                                  'postprocess' : None, \
                                  'inverseprocess' : self._inverseprocess_excl, \
                                  'info_retention' : True, \
                                  'defaultparams' : {'suffix' : '_'}, \
                                  'NArowtype' : 'exclude', \
                                  'MLinfilltype' : 'exclude', \
                                  'labelctgy' : 'copy'}})
    process_dict.update({'sgn1' : {'dualprocess' : self._process_mltp, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_mltp, \
                                  'inverseprocess' : self._inverseprocess_mltp, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'defaultparams' : {'multiply' : -1}, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'numeric', \
                                  'labelctgy' : 'mltp'}})
    process_dict.update({'sgn2' : {'dualprocess' : self._process_bkt3, \
                                  'singleprocess' : None, \
                                  'postprocess' : self._postprocess_bkt3, \
                                  'inverseprocess' : self._inverseprocess_bkt3, \
                                  'info_retention' : False, \
                                  'inplace_option' : True, \
                                  'defaultparams' : {'buckets' : [0]}, \
                                  'NArowtype' : 'numeric', \
                                  'MLinfilltype' : 'singlct', \
                                  'labelctgy' : 'bkt3'}})

    return process_dict

  def _processfamily(self, df_train, df_test, column, category, origcategory, process_dict, \
                    transform_dict, postprocess_dict, assign_param):
    '''
    #as automunge runs a for loop through each column in automunge, this is the master 
    #processing function applied which runs through the different family primitives
    #populated in the transform_dict by assembletransformdict
    
    #we will run in order of
    #siblings, cousins, parents, auntsuncles
    '''
    
    inplaceperformed = False
    
    #final upstream transform from parents or auntsuncles is elligible for inplace
    #as long as no supplement transforms were applied
    final_upstream = False
    if len(transform_dict[category]['auntsuncles']) == 0:
      if len(transform_dict[category]['parents']) > 0:
        final_upstream = transform_dict[category]['parents'][-1]
    else:
      if len(transform_dict[category]['auntsuncles']) > 0:
        final_upstream = transform_dict[category]['auntsuncles'][-1]

    #process the siblings (with downstream, supplemental)
    for sibling in transform_dict[category]['siblings']:

      if sibling != None:
        #note we use the processparent function here
        df_train, df_test, postprocess_dict, inplaceperformed = \
        self._processparent(df_train, df_test, column, sibling, origcategory, final_upstream, \
                          process_dict, transform_dict, postprocess_dict, assign_param)
    
    #process the cousins (no downstream, supplemental)
    for cousin in transform_dict[category]['cousins']:
      
      #this if statement kind of a placeholder such as for validation of primitive entry
      if cousin != None:

        #note we use the processcousin function here
        df_train, df_test, postprocess_dict, inplaceperformed = \
        self._processcousin(df_train, df_test, column, cousin, origcategory, final_upstream, \
                            process_dict, transform_dict, postprocess_dict, assign_param)

    #process the parents (with downstream, with replacement)
    for parent in transform_dict[category]['parents']:

      if parent != None:

        df_train, df_test, postprocess_dict, inplaceperformed = \
        self._processparent(df_train, df_test, column, parent, origcategory, final_upstream, \
                          process_dict, transform_dict, postprocess_dict, assign_param)
        
    #process the auntsuncles (no downstream, with replacement)
    for auntuncle in transform_dict[category]['auntsuncles']:

      if auntuncle != None:

        #note we use the processcousin function here
        df_train, df_test, postprocess_dict, inplaceperformed = \
        self._processcousin(df_train, df_test, column, auntuncle, origcategory, final_upstream, \
                            process_dict, transform_dict, postprocess_dict, assign_param)

    #if we had replacement transformations performed then mark column for deletion
    #(circle of life)
    if len(transform_dict[category]['auntsuncles']) \
    + len(transform_dict[category]['parents']) > 0 \
    and inplaceperformed is False:
      #here we'll only address downstream generaitons
      if column in postprocess_dict['column_dict']:
        postprocess_dict['column_dict'][column]['deletecolumn'] = True
      else:
        if column not in postprocess_dict['orig_noinplace']:
          postprocess_dict['orig_noinplace'].append(column)  
    elif inplaceperformed is True:
      if column in postprocess_dict['column_dict']:
        postprocess_dict['column_dict'][column]['deletecolumn'] = 'inplace'

    return df_train, df_test, postprocess_dict

  def _circleoflife(self, df_train, df_test, column, category, origcategory, process_dict, \
                    transform_dict, postprocess_dict, templist1):
    '''
    #This function deletes source column for cases where family primitives 
    #included replacement, with maintenance of the associated data structures.
    
    #templist1 is the list of df_train columns before processfamily
    '''

    #if we had replacement transformations performed on first generation \
    #then delete the original column
    if len(transform_dict[category]['auntsuncles']) \
    + len(transform_dict[category]['parents']) > 0:
      
      if column in postprocess_dict['orig_noinplace']:
        del df_train[column]
        del df_test[column]

    #if we had replacement transformations performed on downstream generation \
    #then delete the associated parent column 
    
    newcolumns = set(df_train) - set(templist1)
    
    #this one is for columns replaced as part of inplace operation
    if len(newcolumns) > 0:
      anewcolumn = list(newcolumns)[0]
      temp_columnslist = postprocess_dict['column_dict'][anewcolumn]['columnslist'].copy()
      for newcolumn in temp_columnslist:
        if postprocess_dict['column_dict'][newcolumn]['deletecolumn'] == 'inplace':
          for newcolumn2 in temp_columnslist:
            if newcolumn in postprocess_dict['column_dict'][newcolumn2]['columnslist']:        
              postprocess_dict['column_dict'][newcolumn2]['columnslist'].remove(newcolumn)
    
    #this one is for columns we manually delete
    for newcolumn in newcolumns:
      if postprocess_dict['column_dict'][newcolumn]['deletecolumn'] is True:
        for newcolumn2 in newcolumns:
          if newcolumn in postprocess_dict['column_dict'][newcolumn2]['columnslist']:
            postprocess_dict['column_dict'][newcolumn2]['columnslist'].remove(newcolumn)
          
        #now we'll delete column
        #note this only worksa on single column  parents, need to incioroprate categorylist
        #for multicolumn parents (future extension)
        if newcolumn in df_train.columns:
          del df_train[newcolumn]
          del df_test[newcolumn]

    return df_train, df_test, postprocess_dict

  def _dictupdate(self, column, column_dict, postprocess_dict):
    '''
    #dictupdate function takes as input column_dict, postprocess_dict, then for cases
    #where origcolmn is the same fo rhte two combines the columnslist and the 
    #normalization_dict, then appends the column_dict onto the postprocess_dict
    #returns the column_dict and postprocess_dict. Note that the passed column name
    #"column" is the column name prior to the applicaiton of processing, and the
    #name of the column after the. last processing funciton is saved as a key
    #in the column_dict
    '''

    #(reason for "key2" instead of key1 is some shuffling during editing)
    for key2 in column_dict:

      #first address carry-though of origcolumn and origcategory from parent to child
      if column in postprocess_dict['column_dict']:

        #if column is not origcolumn in postprocess_dict
        if postprocess_dict['column_dict'][column]['origcolumn'] \
        != column:

          #assign origcolumn from postprocess_dict to column_dict
          column_dict[key2]['origcolumn'] = \
          postprocess_dict['column_dict'][column]['origcolumn']

          #assign origcategory from postprocess_dict to column_dict
          column_dict[key2]['origcategory'] = \
          postprocess_dict['column_dict'][column]['origcategory']

      for key1 in postprocess_dict['column_dict']:

        #if origcolumn is the same between column_dict saved in postprocess_dict and
        #the column_dict outputed from our processing, we'll combine a few values
        if postprocess_dict['column_dict'][key1]['origcolumn'] == column_dict[key2]['origcolumn']:
          #first we'll combine the columnslist capturing all columns 
          #originating from same origcolumn for these two sets
          postprocess_dict['column_dict'][key1]['columnslist'] = \
          list(set(postprocess_dict['column_dict'][key1]['columnslist'])|set(column_dict[key2]['columnslist']))
          #apply that value to the column_dict columnslist as well
          column_dict[key2]['columnslist'] = postprocess_dict['column_dict'][key1]['columnslist']

    #now append column_dict onto postprocess_dict
    postprocess_dict['column_dict'].update(column_dict)

    #return column_dict, postprocess_dict
    return postprocess_dict

  def _processcousin(self, df_train, df_test, column, cousin, origcategory, final_upstream, \
                     process_dict, transform_dict, postprocess_dict, assign_param):
    '''
    #cousin is one of the primitives for processfamily function, and it involves
    #transformations without downstream derivations without replacement of source column
    #although this same funciton can be used with the auntsuncles primitive
    #by following with a deletion of original column, also this funciton can be
    #used on the niecesnephews primitive downstream of parents or siblings since 
    #they don't have children (they're way to young for that)
    #note the processing funcitons are accessed through the process_dict

    #reminder the format of assign_param is e.g.
    #assignparam = {'splt' : {'column1' : {'minsplit' : 4}}, \
    #               'spl2' : {'column2' : {'minsplit' : 3}}}

    '''

    #for checking type of processdict entries of custom externally defined transformation functions
    def _check_function():
      return
    
    inplaceperformed = False
    inplacecandidate = False
    if final_upstream == cousin:
      inplacecandidate = True

    params = self._grab_params(assign_param, cousin, column, process_dict[cousin], postprocess_dict)

    #if this is a dual process function
    if 'dualprocess' in process_dict[cousin] \
    and (isinstance(process_dict[cousin]['dualprocess'], type(self._processcousin)) \
    or isinstance(process_dict[cousin]['dualprocess'], type(_check_function))):
      
      if inplacecandidate is True:
        if 'inplace_option' in process_dict[cousin]:
          if process_dict[cousin]['inplace_option'] is True:
            if 'inplace' not in params:
              inplaceperformed = True
              params.update({'inplace' : True})
            elif ('inplace' in params and params['inplace'] != False):
              inplaceperformed = True
              params.update({'inplace' : True})
            else:
              inplaceperformed = False
      else:
        #user cannot manually specify inplace by design
        if ('inplace' in params and params['inplace'] is True):
          inplaceperformed = False
          params.update({'inplace' : False})

      df_train, df_test, column_dict_list = \
      process_dict[cousin]['dualprocess'](df_train, df_test, column, origcategory, \
                                          postprocess_dict, params)

    #else if this is a single process function process train and test seperately
    elif 'singleprocess' in process_dict[cousin] \
    and (isinstance(process_dict[cousin]['singleprocess'], type(self._processcousin)) \
    or isinstance(process_dict[cousin]['singleprocess'], type(_check_function))):
      
      if inplacecandidate is True:
        if 'inplace_option' in process_dict[cousin]:
          if process_dict[cousin]['inplace_option'] is True:
            if 'inplace' not in params:
              inplaceperformed = True
              params.update({'inplace' : True})
            elif ('inplace' in params and params['inplace'] != False):
              inplaceperformed = True
              params.update({'inplace' : True})
            else:
              inplaceperformed = False
      else:
        #user cannot manually specify inplace by design
        if ('inplace' in params and params['inplace'] is True):
          inplaceperformed = False
          params.update({'inplace' : False})

      df_train, column_dict_list =  \
      process_dict[cousin]['singleprocess'](df_train, column, origcategory, \
                                            postprocess_dict, params)

      df_test, _1 = \
      process_dict[cousin]['singleprocess'](df_test, column, origcategory, \
                                            postprocess_dict, params)

    #update the columnslist and normalization_dict for both column_dict and postprocess_dict
    for column_dict in column_dict_list:
      postprocess_dict = self._dictupdate(column, column_dict, postprocess_dict)

    return df_train, df_test, postprocess_dict, inplaceperformed

  def _processparent(self, df_train, df_test, column, parent, origcategory, final_upstream, \
                    process_dict, transform_dict, postprocess_dict, assign_param):
    '''
    #parent is one of the primitives for processfamily function, and it involves
    #transformations with downstream derivations with replacement of source column
    #although this same funciton can be used with the siblinga primitive
    #by not following with a deletion of original column, also this funciton can be
    #used on the children primitive downstream of parents or siblings, allowing
    #the children to have children of their own, you know, grandchildren and stuff.
    #note the processing functions are accessed through the process_dict
    
    #reminder the format of assign_param is e.g.
    #assignparam = {'splt' : {'column1' : {'minsplit' : 4}}, \
    #               'spl2' : {'column2' : {'minsplit' : 3}}}
    
    #we want to apply in order of
    #upstream process, niecesnephews, friends, children, coworkers
    '''

    #for checking type of processdict entries of custom externally defined transformation functions
    def _check_function():
      return

    #upstream process
    
    inplaceperformed = False
    inplacecandidate = False
    if final_upstream == parent:
      inplacecandidate = True
    
    params = self._grab_params(assign_param, parent, column, process_dict[parent], postprocess_dict)
    
    #if this is a dual process function
    if 'dualprocess' in process_dict[parent] \
    and (isinstance(process_dict[parent]['dualprocess'], type(self._processparent)) \
    or isinstance(process_dict[parent]['dualprocess'], type(_check_function))):
      
      if inplacecandidate is True:
        if 'inplace_option' in process_dict[parent]:
          if process_dict[parent]['inplace_option'] is True:
            if 'inplace' not in params:
              inplaceperformed = True
              params.update({'inplace' : True})
            elif ('inplace' in params and params['inplace'] != False):
              inplaceperformed = True
              params.update({'inplace' : True})
            else:
              inplaceperformed = False
      else:
        #user cannot manually specify inplace by design
        if ('inplace' in params and params['inplace'] is True):
          inplaceperformed = False
          params.update({'inplace' : False})

      df_train, df_test, column_dict_list = \
      process_dict[parent]['dualprocess'](df_train, df_test, column, origcategory, \
                                          postprocess_dict, params)

    #else if this is a single process function process train and test seperately
    elif 'singleprocess' in process_dict[parent] \
    and (isinstance(process_dict[parent]['singleprocess'], type(self._processparent)) \
    or isinstance(process_dict[parent]['singleprocess'], type(_check_function))):
      
      if inplacecandidate is True:
        if 'inplace_option' in process_dict[parent]:
          if process_dict[parent]['inplace_option'] is True:
            if 'inplace' not in params:
              inplaceperformed = True
              params.update({'inplace' : True})
            elif ('inplace' in params and params['inplace'] != False):
              inplaceperformed = True
              params.update({'inplace' : True})
            else:
              inplaceperformed = False
      else:
        #user cannot manually specify inplace by design
        if ('inplace' in params and params['inplace'] is True):
          inplaceperformed = False
          params.update({'inplace' : False})

      df_train, column_dict_list =  \
      process_dict[parent]['singleprocess'](df_train, column, origcategory, \
                                          postprocess_dict, params)

      df_test, _1 = \
      process_dict[parent]['singleprocess'](df_test, column, origcategory, \
                                          postprocess_dict, params)

    #update the columnslist and normalization_dict for both column_dict and postprocess_dict
    for column_dict in column_dict_list:
      postprocess_dict = self._dictupdate(column, column_dict, postprocess_dict)

      #note this only works for single column source, as currently implemented
      #multicolumn transforms (such as text or bins) cannot serve as parents
      #a future extension may check the categorylist from column_dict for 
      #purposes of transforms applied to multicolumn source
      parentcolumn = list(column_dict.keys())[0]

    #if transform_dict[parent] != None:

    #initialize in case no downstream performed
    parent_inplaceperformed = False
    
    #process any children
    
    #final upstream transform from parents or auntsuncles is elligible for inplace
    #as long as no supplement transforms were applied
    final_downstream = False
    if len(transform_dict[parent]['coworkers']) == 0:
      if len(transform_dict[parent]['children']) > 0:
        final_downstream = transform_dict[parent]['children'][-1]
    else:
      if len(transform_dict[parent]['coworkers']) > 0:
        final_downstream = transform_dict[parent]['coworkers'][-1]

    #process any niecesnephews
    #note the function applied is comparable to processsibling, just a different
    #parent column
    for niecenephew in transform_dict[parent]['niecesnephews']:

      if niecenephew != None:

        #process the niecenephew
        #note the function applied is processparent (using recursion)
        #parent column
        df_train, df_test, postprocess_dict, parent_inplaceperformed = \
        self._processparent(df_train, df_test, parentcolumn, niecenephew, origcategory, final_downstream, \
                           process_dict, transform_dict, postprocess_dict, assign_param)

    #process any friends
    for friend in transform_dict[parent]['friends']:

      if friend != None:

        #process the friend
        #note the function applied is processcousin
        df_train, df_test, postprocess_dict, parent_inplaceperformed = \
        self._processcousin(df_train, df_test, parentcolumn, friend, origcategory, final_downstream, \
                           process_dict, transform_dict, postprocess_dict, assign_param)
    
    for child in transform_dict[parent]['children']:

      if child != None:

        #process the child
        #note the function applied is processparent (using recursion)
        #parent column
        df_train, df_test, postprocess_dict, parent_inplaceperformed = \
        self._processparent(df_train, df_test, parentcolumn, child, origcategory, final_downstream, \
                           process_dict, transform_dict, postprocess_dict, assign_param)

    #process any coworkers
    for coworker in transform_dict[parent]['coworkers']:

      if coworker != None:

        #process the coworker
        #note the function applied is processcousin
        df_train, df_test, postprocess_dict, parent_inplaceperformed = \
        self._processcousin(df_train, df_test, parentcolumn, coworker, origcategory, final_downstream, \
                           process_dict, transform_dict, postprocess_dict, assign_param)

    #if we had replacement transformations performed then mark column for deletion
    #(circle of life)
    if len(transform_dict[parent]['children']) \
    + len(transform_dict[parent]['coworkers']) > 0 \
    and parent_inplaceperformed is False:
      #here we'll only address downstream generaitons
      if parentcolumn in postprocess_dict['column_dict']:
        postprocess_dict['column_dict'][parentcolumn]['deletecolumn'] = True
      else:
        if parentcolumn not in postprocess_dict['orig_noinplace']:
          postprocess_dict['orig_noinplace'].append(parentcolumn)
    elif parent_inplaceperformed is True:
      if parentcolumn in postprocess_dict['column_dict']:
        postprocess_dict['column_dict'][parentcolumn]['deletecolumn'] = 'inplace'

    return df_train, df_test, postprocess_dict, inplaceperformed

  def _df_copy_train(self, df_train, column, newcolumn, suffixoverlap_results = {}):
    """
    #performs a copy operation to add column to a df_train
    #Before any new columns added to df_train
    #checks that they are not already present in df_train
    #if so returns error message and logs in suffixoverlap_results
    """
    
    #test for overlap error
    if newcolumn in df_train.columns:
      
      print("*****************")
      print("Warning of suffix overlap error")
      print("When creating new column: ", newcolumn)
      print("The column was already found present in df_train headers.")
      print("")
      print("Some potential quick fixes for this error include:")
      print("- rename columns to integers before passing to automunge(.)")
      print("- strip underscores '_' from column header titles.")
      print("(convention is all suffix appenders include an underscore)")
      print("")
      print("Please note any updates to column headers will need to be carried through to assignment parameters.")
      print("*****************")
      print("")
      
      suffixoverlap_results.update({newcolumn : True})
      
    else:
      
      df_train[newcolumn] = df_train[column].copy()
      
      suffixoverlap_results.update({newcolumn : False})
    
    return df_train, suffixoverlap_results

  def _df_check_suffixoverlap(self, df_train, newcolumns, suffixoverlap_results = {}):
    """
    #checks that newcolumns list are not already present in df_train
    #logs in suffixoverlap_results
    """
    
    if not isinstance(newcolumns, list):
      newcolumns = [newcolumns]
    
    for newcolumn in newcolumns:
      
      if newcolumn in df_train.columns:
        
        print("*****************")
        print("Warning of suffix overlap error")
        print("When creating new column: ", newcolumn)
        print("The column was already found present in df_train headers.")
        print("")
        print("Some potential quick fixes for this error include:")
        print("- rename columns to integers before passing to automunge(.)")
        print("- strip underscores '_' from column header titles.")
        print("(convention is all suffix appenders include an underscore)")
        print("")
        print("Please note any updates to column headers will need to be carried through to assignment parameters.")
        print("*****************")
        print("")

        suffixoverlap_results.update({newcolumn : True})

      else:

        suffixoverlap_results.update({newcolumn : False})
        
    return suffixoverlap_results

  def _suffix_overlap_final_aggregation_and_printouts(self, postprocess_dict):
    """
    #Performs a final round of printouts in case of identified suffix overlap error
    #Also aggregates the validation results stored in column_dict
    #To a those returned in postprocess_dict['miscparameters_results']
    """
    
    #then at completion of automunge(.), aggregate the suffixoverlap results
    #and do an additional printout if any column overlap error to be sure user sees message
    for entry1 in postprocess_dict['column_dict']:
      for entry2 in postprocess_dict['column_dict'][entry1]['suffixoverlap_results']:
        if postprocess_dict['column_dict'][entry1]['suffixoverlap_results'][entry2] is True:
          
          print("*****************")
          print("Warning of suffix overlap error")
          print("When creating new column: ", entry2)
          print("The column was already found present in df_train headers.")
          print("")
          print("Some potential quick fixes for this error include:")
          print("- rename columns to integers before passing to automunge(.)")
          print("- strip underscores '_' from column header titles.")
          print("(convention is all suffix appenders include an underscore)")
          print("")
          print("Please note any updates to column headers will need to be carried through to assignment parameters.")
          print("*****************")
          print("")
      
      postprocess_dict['miscparameters_results']['suffixoverlap_results'].update(
      postprocess_dict['column_dict'][entry1]['suffixoverlap_results'])

    for entry1 in postprocess_dict['miscparameters_results']['PCA_suffixoverlap_results']:
      if postprocess_dict['miscparameters_results']['PCA_suffixoverlap_results'][entry1] is True:

          print("*****************")
          print("Warning of suffix overlap error")
          print("When creating PCA column: ", entry1)
          print("The column was already found present in df_train headers.")
          print("")
          print("Note that PCA returned columns are of form: PCAcol0")
          print("Where # is integer")
          print("This form of column header should be avoided in passed data.")
          print("")

    for entry1 in postprocess_dict['miscparameters_results']['Binary_suffixoverlap_results']:
      if postprocess_dict['miscparameters_results']['Binary_suffixoverlap_results'][entry1] is True:

          print("*****************")
          print("Warning of suffix overlap error")
          print("When creating Binary column: ", entry1)
          print("The column was already found present in df_train headers.")
          print("")
          print("Note that Binary returned columns are of form: Binary_1010_#")
          print("Where # is integer")
          print("This error might have occured if you passed data including column header 'Binary' to '1010' transform")
          print("This form of column header should be avoided in passed data.")
          print("")

    for entry1 in postprocess_dict['miscparameters_results']['excl_suffixoverlap_results']:
      if postprocess_dict['miscparameters_results']['excl_suffixoverlap_results'][entry1] is True:

          print("*****************")
          print("Warning of suffix overlap error")
          print("When removing '_excl' suffix for column: ", entry1)
          print("The column without suffix was already found present in df_train headers.")
          print("")
          
    return postprocess_dict

  def _process_NArw(self, df, column, category, postprocess_dict, params = {}):
    '''
    #processing funciton that creates a boolean column indicating 1 for rows
    #corresponding to missing or improperly formated data in source column
    #note this uses the NArows function which has a category specific approach
    #returns same dataframe with new column of name column + '_NArw'
    #note this is a "singleprocess" function since is applied to single dataframe
    '''
    
    suffixoverlap_results = {}
    
    suffixoverlap_results = \
    self._df_check_suffixoverlap(df, column + '_NArw', suffixoverlap_results)

    df[column + '_NArw'] = self._getNArows(df, column, category, postprocess_dict)

    #change NArows data type to 8-bit (1 byte) integers for memory savings
    df[column + '_NArw'] = df[column + '_NArw'].astype(np.int8)

    #create list of columns
    nmbrcolumns = [column + '_NArw']
    
    #for drift report
    pct_NArw = df[column + '_NArw'].sum() / df[column + '_NArw'].shape[0]

    #create normalization dictionary
    NArwnormalization_dict = {column + '_NArw' : {'pct_NArw':pct_NArw}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []
    
    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'NArw', \
                           'origcategory' : category, \
                           'normalization_dict' : NArwnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : [nc], \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())

    return df, column_dict_list

  def _process_numerical(self, mdf_train, mdf_test, column, category, \
                              postprocess_dict, params = {}):
    '''
    #process_numerical(mdf_train, mdf_test, column, category)
    #function to normalize data to mean of 0 and standard deviation of 1 \
    #z score normalization) 
    #takes as arguement pandas dataframe of training and test data (mdf_train), (mdf_test)\
    #and the name of the column string ('column') and parent category (category)
    #replaces missing or improperly formatted data with mean of remaining values
    #returns same dataframes with new column of name column + '_nmbr'
    #note this is a "dualprocess" function since is applied to both dataframes
    #expect this approach works better when the numerical distribution is thin tailed
    #if only have training but not test data handy, use same training data for both dataframe inputs
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    #initialize parameters
    #offset is just an added constant applied after multiplier
    if 'offset' in params:
      offset = params['offset']
    else:
      offset = 0
    
    #multiplier scales the set by multiplication prior to offset
    if 'multiplier' in params:
      multiplier = params['multiplier']
    else:
      multiplier = 1
    
    #cap can be passed as True for max of training data or as a specific value prior to normalization, False for no cap
    if 'cap' in params:
      cap = params['cap']
    else:
      cap = False
      
    #floor can be passed as True for min of training data or as a specific value prior to normalization, False for no floor
    if 'floor' in params:
      floor = params['floor']
    else:
      floor = False

    if inplace is not True:
      
      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self._df_copy_train(mdf_train, column, column + '_nmbr', suffixoverlap_results)

      mdf_test[column + '_nmbr'] = mdf_test[column].copy()
    
    else:
      
      suffixoverlap_results = \
      self._df_check_suffixoverlap(mdf_train, column + '_nmbr', suffixoverlap_results)
      
      mdf_train.rename(columns = {column : column + '_nmbr'}, inplace = True)
      mdf_test.rename(columns = {column : column + '_nmbr'}, inplace = True)

    #convert all values to either numeric or NaN
    mdf_train[column + '_nmbr'] = pd.to_numeric(mdf_train[column + '_nmbr'], errors='coerce')
    mdf_test[column + '_nmbr'] = pd.to_numeric(mdf_test[column + '_nmbr'], errors='coerce')
    
    #a few more metrics collected for driftreport
    #get maximum value of training column
    maximum = mdf_train[column + '_nmbr'].max()
    #get minimum value of training column
    minimum = mdf_train[column + '_nmbr'].min()
    
    #if cap < maximum, maximum = cap
    if cap is not False and cap is not True:
      if cap < maximum:
        maximum = cap
    if floor is not False and floor is not True:
      if floor > minimum:
        minimum = floor
        
    #cap and floor application
    if cap is True:
      cap = maximum
    if floor is True:
      floor = minimum
      
    if cap is not False:
      #replace values in test > cap with cap
      mdf_train.loc[mdf_train[column + '_nmbr'] > cap, (column + '_nmbr')] \
      = cap
      
      mdf_test.loc[mdf_test[column + '_nmbr'] > cap, (column + '_nmbr')] \
      = cap
    
    if floor is not False:
      #replace values in test < floor with floor
      mdf_train.loc[mdf_train[column + '_nmbr'] < floor, (column + '_nmbr')] \
      = floor
      
      mdf_test.loc[mdf_test[column + '_nmbr'] < floor, (column + '_nmbr')] \
      = floor

    #get mean of training data
    mean = mdf_train[column + '_nmbr'].mean()
    if mean != mean:
      mean = 0

    #replace missing data with training set mean
    mdf_train[column + '_nmbr'] = mdf_train[column + '_nmbr'].fillna(mean)
    mdf_test[column + '_nmbr'] = mdf_test[column + '_nmbr'].fillna(mean)

    #subtract mean from column for both train and test
    mdf_train[column + '_nmbr'] = mdf_train[column + '_nmbr'] - mean
    mdf_test[column + '_nmbr'] = mdf_test[column + '_nmbr'] - mean

    #get standard deviation of training data
    std = mdf_train[column + '_nmbr'].std()
    
    #special case, if standard deviation is 0 we'll set it to 1 to avoid division by 0
    if std == 0:
      std = 1

    #divide column values by std for both training and test data
    #offset, multiplier are parameters that defaults to zero, one
    mdf_train[column + '_nmbr'] = mdf_train[column + '_nmbr'] / std * multiplier + offset
    mdf_test[column + '_nmbr'] = mdf_test[column + '_nmbr'] / std * multiplier + offset
    
#     #change data type for memory savings
#     mdf_train[column + '_nmbr'] = mdf_train[column + '_nmbr'].astype(np.float32)
#     mdf_test[column + '_nmbr'] = mdf_test[column + '_nmbr'].astype(np.float32)

    #create list of columns
    nmbrcolumns = [column + '_nmbr']

    nmbrnormalization_dict = {column + '_nmbr' : {'mean' : mean, 'std' : std, \
                                                  'max' : maximum, 'min' : minimum, \
                                                  'offset' : offset, 'multiplier': multiplier, \
                                                  'cap' : cap, 'floor' : floor}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'nmbr', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
        
    return mdf_train, mdf_test, column_dict_list
  
  def _process_dxdt(self, df, column, category, postprocess_dict, params = {}):
    '''
    #process_dxdt(df, column, category, postprocess_dict)
    #function to translate a continues variable into a bounded variable
    #by taking delta of row from preceding row
    #assumes the rows are not shuffled and represent a continuous funciton 
    #with consistent time steps
    
    #for missing values, uses adjacent cell infill as default
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    #initialize parameters
    if 'periods' in params:
      periods = params['periods']
    else:
      periods = 1
    
    if inplace is not True:
      
      #copy source column into new column
      df, suffixoverlap_results = \
      self._df_copy_train(df, column, column + '_dxdt', suffixoverlap_results)
    
    else:
      
      suffixoverlap_results = \
      self._df_check_suffixoverlap(df, column + '_dxdt', suffixoverlap_results)
      
      df.rename(columns = {column : column + '_dxdt'}, inplace = True)
    
    #convert all values to either numeric or NaN
    df[column + '_dxdt'] = pd.to_numeric(df[column + '_dxdt'], errors='coerce')
    
    #apply ffill to replace NArows with value from adjacent cell in pre4ceding row
    df[column + '_dxdt'] = df[column + '_dxdt'].fillna(method='ffill')
    
    #we'll follow with a bfill just in case first row had a nan
    df[column + '_dxdt'] = df[column + '_dxdt'].fillna(method='bfill') 
    
    #subtract preceding row
    df[column + '_dxdt'] = df[column + '_dxdt'] - df[column + '_dxdt'].shift(periods = periods)
    
    #first row will have a nan so just one more backfill
    df[column + '_dxdt'] = df[column + '_dxdt'].fillna(method='bfill')
    
    #then one more infill with to address scenario when data wasn't numeric
    #get arbitrary cell value, if one is nan then all will be
    value = df[column + '_dxdt'][0]
    if value != value:
      value = 0

      df[column + '_dxdt'] = df[column + '_dxdt'].fillna(value)
    
    #create list of columns
    nmbrcolumns = [column + '_dxdt']

    #grab some driftreport metrics
    #note that if this function implemented for data streams at scale it may be appropriate
    #to consider creating an alternate to dxdt without the driftreport metrics for postmunge efficiency
    positiveratio = df[df[column + '_dxdt'] >= 0].shape[0] / df[column + '_dxdt'].shape[0]
    negativeratio = df[df[column + '_dxdt'] < 0].shape[0] / df[column + '_dxdt'].shape[0]
    zeroratio = df[df[column + '_dxdt'] == 0].shape[0] / df[column + '_dxdt'].shape[0]
    minimum = df[column + '_dxdt'].min()
    maximum = df[column + '_dxdt'].max()
    mean = df[column + '_dxdt'].mean()
    std = df[column + '_dxdt'].std()

    nmbrnormalization_dict = {column + '_dxdt' : {'positiveratio' : positiveratio, \
                                                  'negativeratio' : negativeratio, \
                                                  'zeroratio' : zeroratio, \
                                                  'minimum' : minimum, \
                                                  'maximum' : maximum, \
                                                  'mean' : mean, \
                                                  'std' : std, \
                                                  'periods' : periods}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:
      
      column_dict = { nc : {'category' : 'dxdt', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
        
    return df, column_dict_list

  def _process_dxd2(self, df, column, category, postprocess_dict, params = {}):
    '''
    #process_dxd2(df, column, category, postprocess_dict)
    #function to translate a continues variable into a bounded variable
    #by taking delta of average of last two rows minus 
    #average of preceding two rows before that
    #should take a littel noise out of noisy data
    #assumes the rows are not shuffled and represent a continuous funciton 
    #with consistent time steps
    
    #for missing values, uses adjacent cell infill as default
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    #initialize parameters
    if 'periods' in params:
      periods = params['periods']
    else:
      periods = 2
    
    if inplace is not True:
      
      #copy source column into new column
      df, suffixoverlap_results = \
      self._df_copy_train(df, column, column + '_dxd2', suffixoverlap_results)
    
    else:
      
      suffixoverlap_results = \
      self._df_check_suffixoverlap(df, column + '_dxd2', suffixoverlap_results)
      
      df.rename(columns = {column : column + '_dxd2'}, inplace = True)
    
    #convert all values to either numeric or NaN
    df[column + '_dxd2'] = pd.to_numeric(df[column + '_dxd2'], errors='coerce')
    
    #apply ffill to replace NArows with value from adjacent cell in pre4ceding row
    df[column + '_dxd2'] = df[column + '_dxd2'].fillna(method='ffill')
    
    #we'll follow with a bfill just in case first row had a nan
    df[column + '_dxd2'] = df[column + '_dxd2'].fillna(method='bfill')  
    
#     #we're going to take difference of average of last two rows with two rows preceding
#     df[column + '_dxd2'] = (df[column + '_dxd2'] + df[column + '_dxd2'].shift()) / 2 \
#                            - ((df[column + '_dxd2'].shift(periods=2) + df[column + '_dxd2'].shift(periods=3)) / 2)

    suffixoverlap_results = \
    self._df_check_suffixoverlap(df, [column + '_temp1'], suffixoverlap_results)

    df[column + '_temp1'] = df[column + '_dxd2'].copy()
    # df_train['number7_temp3'] = df_train['number7'].copy()

    for i in range(periods-1):
      df[column + '_temp1'] = df[column + '_temp1'] + df[column + '_dxd2'].shift(periods = i+1)

    df[column + '_dxd2'] = (df[column + '_temp1'] - df[column + '_temp1'].shift(periods = periods)) / periods
    
    #first row will have a nan so just one more backfill
    df[column + '_dxd2'] = df[column + '_dxd2'].fillna(method='bfill')
    
    #then one more infill with to address scenario when data wasn't numeric
    #get arbitrary cell value, if one is nan then all will be
    value = df[column + '_dxd2'][0]
    if value != value:
      value = 0

      df[column + '_dxd2'] = df[column + '_dxd2'].fillna(value)
    
    del df[column + '_temp1']
    
    #create list of columns
    nmbrcolumns = [column + '_dxd2']

    #grab some driftreport metrics
    #note that if this function implemented for data streams at scale it may be appropriate
    #to consider creating an alternate to dxd2 without the driftreport metrics for postmunge efficiency
    positiveratio = df[df[column + '_dxd2'] >= 0].shape[0] / df[column + '_dxd2'].shape[0]
    negativeratio = df[df[column + '_dxd2'] < 0].shape[0] / df[column + '_dxd2'].shape[0]
    zeroratio = df[df[column + '_dxd2'] == 0].shape[0] / df[column + '_dxd2'].shape[0]
    minimum = df[column + '_dxd2'].min()
    maximum = df[column + '_dxd2'].max()
    mean = df[column + '_dxd2'].mean()
    std = df[column + '_dxd2'].std()
  
    nmbrnormalization_dict = {column + '_dxd2' : {'positiveratio' : positiveratio, \
                                                  'negativeratio' : negativeratio, \
                                                  'zeroratio' : zeroratio, \
                                                  'minimum' : minimum, \
                                                  'maximum' : maximum, \
                                                  'mean' : mean, \
                                                  'std' : std, \
                                                  'periods' : periods}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'dxd2', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
        
    return df, column_dict_list

  def _process_shft(self, df, column, category, postprocess_dict, params = {}):
    '''
    #process_shft(df, column, category, postprocess_dict)
    #function to shift a sequential set forward by one or more time steps    
    #for missing values, uses adjacent cell infill as default
    #accepts parameter 'periods' for number of time steps, defaults to one
    #accepts parameter 'suffix' for column suffix appender
    #such as may be useful if applying this transform to the same column more than once
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    #initialize parameters
    if 'periods' in params:
      periods = params['periods']
    else:
      periods = 1
    if 'suffix' in params:
      suffix = params['suffix']
    else:
      suffix = 'shft'
      
    shft_column = column + '_' + suffix
    
    if inplace is not True:
      
      #copy source column into new column
      df, suffixoverlap_results = \
      self._df_copy_train(df, column, shft_column, suffixoverlap_results)
    
    else:
      
      suffixoverlap_results = \
      self._df_check_suffixoverlap(df, shft_column, suffixoverlap_results)
      
      df.rename(columns = {column : shft_column}, inplace = True)
    
    #convert all values to either numeric or NaN
    df[shft_column] = pd.to_numeric(df[shft_column], errors='coerce')
    
    #apply ffill to replace NArows with value from adjacent cell in pre4ceding row
    df[shft_column] = df[shft_column].fillna(method='ffill')
    
    #we'll follow with a bfill just in case first row had a nan
    df[shft_column] = df[shft_column].fillna(method='bfill') 
    
    #shift from preceding row
    df[shft_column] = df[shft_column].shift(periods = periods)
    
    #first row will have a nan so just one more backfill
    df[shft_column] = df[shft_column].fillna(method='bfill')
    
    #then one more infill with to address scenario when data wasn't numeric
    #get arbitrary cell value, if one is nan then all will be
    value = df[shft_column][0]
    if value != value:
      value = 0

      df[shft_column] = df[shft_column].fillna(value)
    
    #create list of columns
    nmbrcolumns = [shft_column]

    #grab some driftreport metrics
    #note that if this function implemented for data streams at scale it may be appropriate
    #to consider creating an alternate to dxdt without the driftreport metrics for postmunge efficiency
    positiveratio = df[df[shft_column] >= 0].shape[0] / df[shft_column].shape[0]
    negativeratio = df[df[shft_column] < 0].shape[0] / df[shft_column].shape[0]
    zeroratio = df[df[shft_column] == 0].shape[0] / df[shft_column].shape[0]
    minimum = df[shft_column].min()
    maximum = df[shft_column].max()
    mean = df[shft_column].mean()
    std = df[shft_column].std()

    nmbrnormalization_dict = {shft_column :      {'positiveratio' : positiveratio, \
                                                  'negativeratio' : negativeratio, \
                                                  'zeroratio' : zeroratio, \
                                                  'minimum' : minimum, \
                                                  'maximum' : maximum, \
                                                  'mean' : mean, \
                                                  'std' : std, \
                                                  'periods' : periods, \
                                                  'suffix' : suffix}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:
      
      column_dict = { nc : {'category' : 'shft', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
        
    return df, column_dict_list
  
  def _process_shf2(self, df, column, category, postprocess_dict, params = {}):
    '''
    #process_shft(df, column, category, postprocess_dict)
    #function to shift a sequential set forward by one or more time steps    
    #for missing values, uses adjacent cell infill as default
    #accepts parameter 'periods' for number of time steps, defaults to one
    #accepts parameter 'suffix' for column suffix appender
    #such as may be useful if applying this transform to the same column more than once
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    #initialize parameters
    if 'periods' in params:
      periods = params['periods']
    else:
      periods = 2
    if 'suffix' in params:
      suffix = params['suffix']
    else:
      suffix = 'shf2'
      
    shft_column = column + '_' + suffix
    
    if inplace is not True:
      
      #copy source column into new column
      df, suffixoverlap_results = \
      self._df_copy_train(df, column, shft_column, suffixoverlap_results)
    
    else:
      
      suffixoverlap_results = \
      self._df_check_suffixoverlap(df, shft_column, suffixoverlap_results)
      
      df.rename(columns = {column : shft_column}, inplace = True)
    
    #convert all values to either numeric or NaN
    df[shft_column] = pd.to_numeric(df[shft_column], errors='coerce')
    
    #apply ffill to replace NArows with value from adjacent cell in pre4ceding row
    df[shft_column] = df[shft_column].fillna(method='ffill')
    
    #we'll follow with a bfill just in case first row had a nan
    df[shft_column] = df[shft_column].fillna(method='bfill') 
    
    #shift from preceding row
    df[shft_column] = df[shft_column].shift(periods = periods)
    
    #first row will have a nan so just one more backfill
    df[shft_column] = df[shft_column].fillna(method='bfill')
    
    #then one more infill with to address scenario when data wasn't numeric
    #get arbitrary cell value, if one is nan then all will be
    value = df[shft_column][0]
    if value != value:
      value = 0

      df[shft_column] = df[shft_column].fillna(value)
    
    #create list of columns
    nmbrcolumns = [shft_column]

    #grab some driftreport metrics
    #note that if this function implemented for data streams at scale it may be appropriate
    #to consider creating an alternate to dxdt without the driftreport metrics for postmunge efficiency
    positiveratio = df[df[shft_column] >= 0].shape[0] / df[shft_column].shape[0]
    negativeratio = df[df[shft_column] < 0].shape[0] / df[shft_column].shape[0]
    zeroratio = df[df[shft_column] == 0].shape[0] / df[shft_column].shape[0]
    minimum = df[shft_column].min()
    maximum = df[shft_column].max()
    mean = df[shft_column].mean()
    std = df[shft_column].std()

    nmbrnormalization_dict = {shft_column :      {'positiveratio' : positiveratio, \
                                                  'negativeratio' : negativeratio, \
                                                  'zeroratio' : zeroratio, \
                                                  'minimum' : minimum, \
                                                  'maximum' : maximum, \
                                                  'mean' : mean, \
                                                  'std' : std, \
                                                  'periods' : periods, \
                                                  'suffix' : suffix}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:
      
      column_dict = { nc : {'category' : 'shf2', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
        
    return df, column_dict_list
  
  def _process_shf3(self, df, column, category, postprocess_dict, params = {}):
    '''
    #process_shft(df, column, category, postprocess_dict)
    #function to shift a sequential set forward by one or more time steps    
    #for missing values, uses adjacent cell infill as default
    #accepts parameter 'periods' for number of time steps, defaults to one
    #accepts parameter 'suffix' for column suffix appender
    #such as may be useful if applying this transform to the same column more than once
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    #initialize parameters
    if 'periods' in params:
      periods = params['periods']
    else:
      periods = 3
    if 'suffix' in params:
      suffix = params['suffix']
    else:
      suffix = 'shf3'
      
    shft_column = column + '_' + suffix
    
    if inplace is not True:
      
      #copy source column into new column
      df, suffixoverlap_results = \
      self._df_copy_train(df, column, shft_column, suffixoverlap_results)
    
    else:
      
      suffixoverlap_results = \
      self._df_check_suffixoverlap(df, shft_column, suffixoverlap_results)
      
      df.rename(columns = {column : shft_column}, inplace = True)
    
    #convert all values to either numeric or NaN
    df[shft_column] = pd.to_numeric(df[shft_column], errors='coerce')
    
    #apply ffill to replace NArows with value from adjacent cell in pre4ceding row
    df[shft_column] = df[shft_column].fillna(method='ffill')
    
    #we'll follow with a bfill just in case first row had a nan
    df[shft_column] = df[shft_column].fillna(method='bfill') 
    
    #shift from preceding row
    df[shft_column] = df[shft_column].shift(periods = periods)
    
    #first row will have a nan so just one more backfill
    df[shft_column] = df[shft_column].fillna(method='bfill')
    
    #then one more infill with to address scenario when data wasn't numeric
    #get arbitrary cell value, if one is nan then all will be
    value = df[shft_column][0]
    if value != value:
      value = 0

      df[shft_column] = df[shft_column].fillna(value)
    
    #create list of columns
    nmbrcolumns = [shft_column]

    #grab some driftreport metrics
    #note that if this function implemented for data streams at scale it may be appropriate
    #to consider creating an alternate to dxdt without the driftreport metrics for postmunge efficiency
    positiveratio = df[df[shft_column] >= 0].shape[0] / df[shft_column].shape[0]
    negativeratio = df[df[shft_column] < 0].shape[0] / df[shft_column].shape[0]
    zeroratio = df[df[shft_column] == 0].shape[0] / df[shft_column].shape[0]
    minimum = df[shft_column].min()
    maximum = df[shft_column].max()
    mean = df[shft_column].mean()
    std = df[shft_column].std()

    nmbrnormalization_dict = {shft_column :      {'positiveratio' : positiveratio, \
                                                  'negativeratio' : negativeratio, \
                                                  'zeroratio' : zeroratio, \
                                                  'minimum' : minimum, \
                                                  'maximum' : maximum, \
                                                  'mean' : mean, \
                                                  'std' : std, \
                                                  'periods' : periods, \
                                                  'suffix' : suffix}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:
      
      column_dict = { nc : {'category' : 'shf3', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
        
    return df, column_dict_list

  def _process_MADn(self, mdf_train, mdf_test, column, category, \
                              postprocess_dict, params = {}):
    '''
    #process_MADn(mdf_train, mdf_test, column, category)
    #function to normalize data to mean of 0 and mean absolute deviation of 1
    #takes as arguement pandas dataframe of training and test data (mdf_train), (mdf_test)\
    #and the name of the column string ('column') and parent category (category)
    #replaces missing or improperly formatted data with mean of remaining values
    #returns same dataframes with new column of name column + '_MADn'
    #note this is a "dualprocess" function since is applied to both train and test dataframes
    #expect this approach works better than z-score for when the numerical distribution isn't thin tailed
    #if only have training but not test data handy, use same training data for both dataframe inputs
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    if inplace is not True:
      
      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self._df_copy_train(mdf_train, column, column + '_MADn', suffixoverlap_results)

      mdf_test[column + '_MADn'] = mdf_test[column].copy()
    
    else:
      
      suffixoverlap_results = \
      self._df_check_suffixoverlap(mdf_train, column + '_MADn', suffixoverlap_results)
      
      mdf_train.rename(columns = {column : column + '_MADn'}, inplace = True)
      mdf_test.rename(columns = {column : column + '_MADn'}, inplace = True)

    #convert all values to either numeric or NaN
    mdf_train[column + '_MADn'] = pd.to_numeric(mdf_train[column + '_MADn'], errors='coerce')
    mdf_test[column + '_MADn'] = pd.to_numeric(mdf_test[column + '_MADn'], errors='coerce')
    
    #a few more metrics collected for driftreport
    #get maximum value of training column
    maximum = mdf_train[column + '_MADn'].max()
    #get minimum value of training column
    minimum = mdf_train[column + '_MADn'].min()

    #get mean of training data
    mean = mdf_train[column + '_MADn'].mean() 
    if mean != mean:
      mean = 0

    #replace missing data with training set mean
    mdf_train[column + '_MADn'] = mdf_train[column + '_MADn'].fillna(mean)
    mdf_test[column + '_MADn'] = mdf_test[column + '_MADn'].fillna(mean)

    #subtract mean from column for both train and test
    mdf_train[column + '_MADn'] = mdf_train[column + '_MADn'] - mean
    mdf_test[column + '_MADn'] = mdf_test[column + '_MADn'] - mean

    #get mean absolute deviation of training data
    MAD = mdf_train[column + '_MADn'].mad()
    
    #special case to avoid div by 0
    if MAD == 0:
      MAD = 1

    #divide column values by mad for both training and test data
    mdf_train[column + '_MADn'] = mdf_train[column + '_MADn'] / MAD
    mdf_test[column + '_MADn'] = mdf_test[column + '_MADn'] / MAD

#     #change data type for memory savings
#     mdf_train[column + '_MADn'] = mdf_train[column + '_MADn'].astype(np.float32)
#     mdf_test[column + '_MADn'] = mdf_test[column + '_MADn'].astype(np.float32)

    #create list of columns
    nmbrcolumns = [column + '_MADn']

    nmbrnormalization_dict = {column + '_MADn' : {'mean' : mean, 'MAD' : MAD, \
                                                  'maximum':maximum, 'minimum':minimum}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'MADn', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
    
    return mdf_train, mdf_test, column_dict_list

  def _process_MAD3(self, mdf_train, mdf_test, column, category, \
                              postprocess_dict, params = {}):
    '''
    #process_MAD3(mdf_train, mdf_test, column, category)
    #function to normalize data by subtracting maximum and dividing by median absolute deviation
    #takes as arguement pandas dataframe of training and test data (mdf_train), (mdf_test)\
    #and the name of the column string ('column') and parent category (category)
    #replaces missing or improperly formatted data with mean of remaining values
    #returns same dataframes with new column of name column + '_MADn'
    #note this is a "dualprocess" function since is applied to both train and test dataframes
    #expect this approach works better than z-score for when the numerical distribution isn't thin tailed
    #if only have training but not test data handy, use same training data for both dataframe inputs
    #the use of maximum instead of mean for normalization based on comment from RWRI lectures 
    #documented in (anonymized)
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    if inplace is not True:
      
      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self._df_copy_train(mdf_train, column, column + '_MAD3', suffixoverlap_results)

      mdf_test[column + '_MAD3'] = mdf_test[column].copy()
    
    else:
      
      suffixoverlap_results = \
      self._df_check_suffixoverlap(mdf_train, column + '_MAD3', suffixoverlap_results)
      
      mdf_train.rename(columns = {column : column + '_MAD3'}, inplace = True)
      mdf_test.rename(columns = {column : column + '_MAD3'}, inplace = True)

    #convert all values to either numeric or NaN
    mdf_train[column + '_MAD3'] = pd.to_numeric(mdf_train[column + '_MAD3'], errors='coerce')
    mdf_test[column + '_MAD3'] = pd.to_numeric(mdf_test[column + '_MAD3'], errors='coerce')
    
    #a few more metrics collected for driftreport
    #get maximum value of training column
    maximum = mdf_train[column + '_MAD3'].max()
    #get minimum value of training column
    minimum = mdf_train[column + '_MAD3'].min()

    #get mean of training data
    mean = mdf_train[column + '_MAD3'].mean()
    if mean != mean:
      mean = 0
    
    #replace missing data with training set mean
    mdf_train[column + '_MAD3'] = mdf_train[column + '_MAD3'].fillna(mean)
    mdf_test[column + '_MAD3'] = mdf_test[column + '_MAD3'].fillna(mean)

    #get max of training data
    datamax = mdf_train[column + '_MAD3'].max()
    
    #get mean absolute deviation of training data
    MAD = mdf_train[column + '_MAD3'].mad()
    
    #special case to avoid div by 0
    if MAD == 0:
      MAD = 1
    
    #subtract max from column for both train and test
    mdf_train[column + '_MAD3'] = mdf_train[column + '_MAD3'] - datamax
    mdf_test[column + '_MAD3'] = mdf_test[column + '_MAD3'] - datamax

    #divide column values by mad for both training and test data
    mdf_train[column + '_MAD3'] = mdf_train[column + '_MAD3'] / MAD
    mdf_test[column + '_MAD3'] = mdf_test[column + '_MAD3'] / MAD

#     #change data type for memory savings
#     mdf_train[column + '_MAD3'] = mdf_train[column + '_MAD3'].astype(np.float32)
#     mdf_test[column + '_MAD3'] = mdf_test[column + '_MAD3'].astype(np.float32)

    #create list of columns
    nmbrcolumns = [column + '_MAD3']

    nmbrnormalization_dict = {column + '_MAD3' : {'mean' : mean, 'MAD' : MAD, 'datamax' : datamax, \
                                                  'maximum':maximum, 'minimum':minimum}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'MAD3', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
        
    return mdf_train, mdf_test, column_dict_list

  def _process_mnmx(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    '''
    #process_mnmx(mdf_train, mdf_test, column, category)
    #function to scale data to minimum of 0 and maximum of 1 \
    #based on min/max values from training set for this column
    #takes as arguement pandas dataframe of training and test data (mdf_train), (mdf_test)\
    #and the name of the column string ('column') and parent category (category)
    #replaces missing or improperly formatted data with mean of remaining values
    #returns same dataframes with new column of name column + '_mnmx'
    #note this is a "dualprocess" function since is applied to both dataframes
    #expect this approach works better when the numerical distribution is thin tailed
    #if only have training but not test data handy, use same training data for both
    #dataframe inputs
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    #for cap ands floor, False means not applied, True means based on set's found max/min in train set
    
    #initialize parameters
    #cap can be passed as True for max of training data or as a specific value prior to normalizaiton, False for no cap
    if 'cap' in params:
      cap = params['cap']
    else:
      cap = False
      
    #floor can be passed as True for min of training data or as a specific value prior to normalizaiton, False for no floor
    if 'floor' in params:
      floor = params['floor']
    else:
      floor = False
    
    if inplace is not True:
      
      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self._df_copy_train(mdf_train, column, column + '_mnmx', suffixoverlap_results)

      mdf_test[column + '_mnmx'] = mdf_test[column].copy()
    
    else:
      
      suffixoverlap_results = \
      self._df_check_suffixoverlap(mdf_train, column + '_mnmx', suffixoverlap_results)
      
      mdf_train.rename(columns = {column : column + '_mnmx'}, inplace = True)
      mdf_test.rename(columns = {column : column + '_mnmx'}, inplace = True)

    #convert all values to either numeric or NaN
    mdf_train[column + '_mnmx'] = pd.to_numeric(mdf_train[column + '_mnmx'], errors='coerce')
    mdf_test[column + '_mnmx'] = pd.to_numeric(mdf_test[column + '_mnmx'], errors='coerce')
    
    #a few more metrics collected for driftreport
    #get standard deviation of training data
    std = mdf_train[column + '_mnmx'].std()

    #get mean of training data
    mean = mdf_train[column + '_mnmx'].mean()   
    if mean != mean:
      mean = 0

    #replace missing data with training set mean
    mdf_train[column + '_mnmx'] = mdf_train[column + '_mnmx'].fillna(mean)
    mdf_test[column + '_mnmx'] = mdf_test[column + '_mnmx'].fillna(mean)
    
    #get maximum value of training column
    maximum = mdf_train[column + '_mnmx'].max()
    
    #get minimum value of training column
    minimum = mdf_train[column + '_mnmx'].min()
    
    #if cap < maximum, maximum = cap
    if cap is not False and cap is not True:
      if cap < maximum:
        maximum = cap
    if floor is not False and floor is not True:
      if floor > minimum:
        minimum = floor
    
    #avoid outlier div by zero when max = min
    maxminusmin = maximum - minimum
    if maxminusmin == 0:
      maxminusmin = 1
    
    #perform min-max scaling to train and test sets using values from train
    mdf_train[column + '_mnmx'] = (mdf_train[column + '_mnmx'] - minimum) / \
                                  (maxminusmin)
    
    mdf_test[column + '_mnmx'] = (mdf_test[column + '_mnmx'] - minimum) / \
                                 (maxminusmin)

    #cap and floor application
    if cap is True:
      cap = maximum
    if floor is True:
      floor = minimum
    
    if cap is not False:
      #replace values in test > cap with cap
      mdf_train.loc[mdf_train[column + '_mnmx'] > (cap - minimum)/maxminusmin, (column + '_mnmx')] \
      = (cap - minimum)/maxminusmin
      
      mdf_test.loc[mdf_test[column + '_mnmx'] > (cap - minimum)/maxminusmin, (column + '_mnmx')] \
      = (cap - minimum)/maxminusmin
    
    if floor is not False:
      #replace values in test < floor with floor
      mdf_train.loc[mdf_train[column + '_mnmx'] < (floor - minimum)/maxminusmin, (column + '_mnmx')] \
      = (floor - minimum)/maxminusmin
      
      mdf_test.loc[mdf_test[column + '_mnmx'] < (floor - minimum)/maxminusmin, (column + '_mnmx')] \
      = (floor - minimum)/maxminusmin
    
    #create list of columns
    nmbrcolumns = [column + '_mnmx']

    nmbrnormalization_dict = {column + '_mnmx' : {'minimum' : minimum, \
                                                  'maximum' : maximum, \
                                                  'maxminusmin' : maxminusmin, \
                                                  'mean' : mean, \
                                                  'std' : std, \
                                                  'cap' : cap, \
                                                  'floor' : floor}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'mnmx', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
        
    return mdf_train, mdf_test, column_dict_list

  def _process_mnm3(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    '''
    #process_mnmx(mdf_train, mdf_test, column, category)
    #function to scale data to minimum of 0 and maximum of 1 \
    #after replacing extreme values above the 0.99 quantile with
    #the value of 0.99 quantile and extreme values below the 0.01
    #quantile with the value of 0.01 quantile
    #(accepts parameters qmax and qmin to customize these 0.99/0.01 values)
    #takes as arguement pandas dataframe of training and test data (mdf_train), (mdf_test)\
    #and the name of the column string ('column') and parent category (category)
    #replaces missing or improperly formatted data with mean of remaining values
    #returns same dataframes with new column of name column + '_mnmx'
    #note this is a "dualprocess" function since is applied to both dataframes
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False

    #initialize parameters
    if 'qmax' in params:
      qmax = params['qmax']
    else:
      qmax = 0.99
      
    if 'qmin' in params:
      qmin = params['qmin']
    else:
      qmin = 0.01
    
    if inplace is not True:
      
      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self._df_copy_train(mdf_train, column, column + '_mnm3', suffixoverlap_results)

      mdf_test[column + '_mnm3'] = mdf_test[column].copy()
    
    else:
      
      suffixoverlap_results = \
      self._df_check_suffixoverlap(mdf_train, column + '_mnm3', suffixoverlap_results)
      
      mdf_train.rename(columns = {column : column + '_mnm3'}, inplace = True)
      mdf_test.rename(columns = {column : column + '_mnm3'}, inplace = True)

    #convert all values to either numeric or NaN
    mdf_train[column + '_mnm3'] = pd.to_numeric(mdf_train[column + '_mnm3'], errors='coerce')
    mdf_test[column + '_mnm3'] = pd.to_numeric(mdf_test[column + '_mnm3'], errors='coerce')
    
    #a few more metrics collected for driftreport
    #get standard deviation of training data
    std = mdf_train[column + '_mnm3'].std()

    #get maximum value of training column
    quantilemax = mdf_train[column + '_mnm3'].quantile(qmax)
    
    if quantilemax != quantilemax:
      quantilemax = 0

    #get minimum value of training column
    quantilemin = mdf_train[column + '_mnm3'].quantile(qmin)
    
    if quantilemin != quantilemin:
      quantilemin = 0

    #replace values > quantilemax with quantilemax
    mdf_train.loc[mdf_train[column + '_mnm3'] > quantilemax, (column + '_mnm3')] \
    = quantilemax
    mdf_test.loc[mdf_test[column + '_mnm3'] > quantilemax, (column + '_mnm3')] \
    = quantilemax
    #replace values < quantile10 with quantile10
    mdf_train.loc[mdf_train[column + '_mnm3'] < quantilemin, (column + '_mnm3')] \
    = quantilemin
    mdf_test.loc[mdf_test[column + '_mnm3'] < quantilemin, (column + '_mnm3')] \
    = quantilemin

    #note this step is now performed after the quantile evaluation / replacement

    #get mean of training data
    mean = mdf_train[column + '_mnm3'].mean()    
    if mean != mean:
      mean = 0
    
    #replace missing data with training set mean
    mdf_train[column + '_mnm3'] = mdf_train[column + '_mnm3'].fillna(mean)
    mdf_test[column + '_mnm3'] = mdf_test[column + '_mnm3'].fillna(mean)
    
    #avoid outlier div by zero when max = min
    maxminusmin = quantilemax - quantilemin
    if maxminusmin == 0:
      maxminusmin = 1

    #perform min-max scaling to train and test sets using values from train
    mdf_train[column + '_mnm3'] = (mdf_train[column + '_mnm3'] - quantilemin) / \
                                  (maxminusmin)

    mdf_test[column + '_mnm3'] = (mdf_test[column + '_mnm3'] - quantilemin) / \
                                 (maxminusmin)

#     #change data type for memory savings
#     mdf_train[column + '_mnm3'] = mdf_train[column + '_mnm3'].astype(np.float32)
#     mdf_test[column + '_mnm3'] = mdf_test[column + '_mnm3'].astype(np.float32)
    
    #create list of columns
    nmbrcolumns = [column + '_mnm3']

    nmbrnormalization_dict = {column + '_mnm3' : {'quantilemin' : quantilemin, \
                                                  'quantilemax' : quantilemax, \
                                                  'maxminusmin' : maxminusmin, \
                                                  'mean' : mean, \
                                                  'std' : std, \
                                                  'qmax' : qmax, \
                                                  'qmin' : qmin}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'mnm3', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())

    return mdf_train, mdf_test, column_dict_list

  def _process_mxab(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    '''
    #process_mxab(mdf_train, mdf_test, column, category)
    #function to scale data to minimum of -1 and maximum of 1 \
    #based on division by max absolute values from training set.
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    if inplace is not True:
      
      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self._df_copy_train(mdf_train, column, column + '_mxab', suffixoverlap_results)

      mdf_test[column + '_mxab'] = mdf_test[column].copy()
    
    else:
      
      suffixoverlap_results = \
      self._df_check_suffixoverlap(mdf_train, column + '_mxab', suffixoverlap_results)
      
      mdf_train.rename(columns = {column : column + '_mxab'}, inplace = True)
      mdf_test.rename(columns = {column : column + '_mxab'}, inplace = True)

    #convert all values to either numeric or NaN
    mdf_train[column + '_mxab'] = pd.to_numeric(mdf_train[column + '_mxab'], errors='coerce')
    mdf_test[column + '_mxab'] = pd.to_numeric(mdf_test[column + '_mxab'], errors='coerce')
    
    #a few more metrics collected for driftreport
    #get standard deviation of training data
    std = mdf_train[column + '_mxab'].std()

    #get mean of training data
    mean = mdf_train[column + '_mxab'].mean()   
    if mean != mean:
      mean = 0

    #replace missing data with training set mean
    mdf_train[column + '_mxab'] = mdf_train[column + '_mxab'].fillna(mean)
    mdf_test[column + '_mxab'] = mdf_test[column + '_mxab'].fillna(mean)
    
    #get maximum value of training column
    maximum = mdf_train[column + '_mxab'].max()
    
    #get minimum value of training column
    minimum = mdf_train[column + '_mxab'].min()
    
    #get max absolute
    maxabs = max(abs(maximum), abs(minimum))
    
    #avoid outlier div by zero when max = min
    if maxabs == 0:
      maxabs = 1
    
    #perform maxabs scaling to train and test sets using values from train
    mdf_train[column + '_mxab'] = mdf_train[column + '_mxab'] / \
                                  (maxabs)
    
    mdf_test[column + '_mxab'] = mdf_test[column + '_mxab'] / \
                                 (maxabs)
    
    #create list of columns
    nmbrcolumns = [column + '_mxab']

    nmbrnormalization_dict = {column + '_mxab' : {'minimum' : minimum, \
                                                  'maximum' : maximum, \
                                                  'maxabs' : maxabs, \
                                                  'mean' : mean, \
                                                  'std' : std}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'mxab', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
        
    return mdf_train, mdf_test, column_dict_list
  
  def _process_retn(self, mdf_train, mdf_test, column, category, postprocess_dict, params = {}):
    """
    #process_retn(mdf_train, mdf_test, column, category)
    #function to scale data as follows:
    
    # if max >= 0 and min <= 0:
    #   #scaling based on 
    #   x = x / (max - min)

    # elif max >= 0 and min >= 0:
    #   #traditional min/max
    #   x = (x - min) / (max - min)
    
    # elif max <= 0 and min <= 0:
    #   #max/min (retains negative values)
    #   x = (x - max) / (max - min)
    
    #replaces missing or improperly formatted data with mean of remaining values
    
    #returns same dataframes with new column of name column + '_retn'
    #note this is a "dualprocess" function since is applied to both dataframes
    
    #note with parameters divisor can also be set as standard deviation
    #also aprameters accepted for cap/floor/mulitplier/offset
    #where cap/floor based on pretransform values
    #multiplier/offset based on posttransform values, muoltiplier applied betfore offset
    """
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    #initialize parameters
    
    #accepts divisor parameters of 'minmax' or 'std', eg divisor for normalization equation
    #note that standard deviation doesn't have same properties for sign retention when all values > or < 0
    if 'divisor' in params:
      divisor = params['divisor']
    else:
      divisor = 'minmax'
    
    #offset is just an added constant applied after multiplier
    if 'offset' in params:
      offset = params['offset']
    else:
      offset = 0
    
    #multiplier scales the set by multiplication prior to offset
    if 'multiplier' in params:
      multiplier = params['multiplier']
    else:
      multiplier = 1
    
    #cap can be passed as True for max of training data or as a specific value prior to normalization, False for no cap
    if 'cap' in params:
      cap = params['cap']
    else:
      cap = False
    
    #floor can be passed as True for min of training data or as a specific value prior to normalization, False for no floor
    if 'floor' in params:
      floor = params['floor']
    else:
      floor = False
    
    if inplace is not True:
      
      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self._df_copy_train(mdf_train, column, column + '_retn', suffixoverlap_results)

      mdf_test[column + '_retn'] = mdf_test[column].copy()
    
    else:
      
      suffixoverlap_results = \
      self._df_check_suffixoverlap(mdf_train, column + '_retn', suffixoverlap_results)
      
      mdf_train.rename(columns = {column : column + '_retn'}, inplace = True)
      mdf_test.rename(columns = {column : column + '_retn'}, inplace = True)

    #convert all values to either numeric or NaN
    mdf_train[column + '_retn'] = pd.to_numeric(mdf_train[column + '_retn'], errors='coerce')
    mdf_test[column + '_retn'] = pd.to_numeric(mdf_test[column + '_retn'], errors='coerce')
    
    #a few more metrics collected for driftreport
    #get standard deviation of training data
    std = mdf_train[column + '_retn'].std()
    
    mad = mdf_train[column + '_retn'].mad()
    
    #get maximum value of training column
    maximum = mdf_train[column + '_retn'].max()
    
    #get minimum value of training column
    minimum = mdf_train[column + '_retn'].min()
    
    #avoid outlier div by zero when max = min
    maxminusmin = maximum - minimum
    if maxminusmin == 0 or maxminusmin != maxminusmin:
      maxminusmin = 1
      
    if std != std or std == 0:
      std = 1
      
    if mad != mad or mad == 0:
      mad = 1
      
    #if cap < maximum, maximum = cap
    if cap is not False and cap is not True:
      if cap < maximum:
        maximum = cap
    if floor is not False and floor is not True:
      if floor > minimum:
        minimum = floor
        
    #cap and floor application
    if cap is True:
      cap = maximum
    if floor is True:
      floor = minimum
      
    if cap is not False:
      #replace values in test > cap with cap
      mdf_train.loc[mdf_train[column + '_retn'] > cap, (column + '_retn')] \
      = cap
      
      mdf_test.loc[mdf_test[column + '_retn'] > cap, (column + '_retn')] \
      = cap
    
    if floor is not False:
      #replace values in test < floor with floor
      mdf_train.loc[mdf_train[column + '_retn'] < floor, (column + '_retn')] \
      = floor
      
      mdf_test.loc[mdf_test[column + '_retn'] < floor, (column + '_retn')] \
      = floor
      
    #get mean of training data
    mean = mdf_train[column + '_retn'].mean()
    if mean != mean:
      mean = 0

    #replace missing data with training set mean
    mdf_train[column + '_retn'] = mdf_train[column + '_retn'].fillna(mean)
    mdf_test[column + '_retn'] = mdf_test[column + '_retn'].fillna(mean)
    
    #edge case (only neccesary so scalingapproach is assigned)
    if maximum != maximum:
      maximum = 0
    if minimum != minimum:
      minimum = 0
    
    #divisor
    if divisor not in {'minmax', 'std', 'mad'}:
      print("Error: retn transform parameter 'divisor' only accepts entries of 'minmax' 'mad' or 'std'")
    if divisor == 'minmax':
      divisor = maxminusmin
    elif divisor == 'mad':
      divisor = mad
    else:
      divisor = std
      
    if divisor == 0 or divisor != divisor:
      divisor = 1
    
    #driftreport metric scalingapproach returned as 'retn' or 'mnmx' or 'mxmn'
    #where mnmx is for cases where all values in train set are positive
    #mxmn is for cases where all values in train set are negative
    
    if maximum >= 0 and minimum <= 0:
      
      mdf_train[column + '_retn'] = (mdf_train[column + '_retn']) / \
                                    (divisor) * multiplier + offset
      
      mdf_test[column + '_retn'] = (mdf_test[column + '_retn']) / \
                                    (divisor) * multiplier + offset
      
      scalingapproach = 'retn'
      
    elif maximum >= 0 and minimum >= 0:
    
      #perform min-max scaling to train and test sets using values from train
      mdf_train[column + '_retn'] = (mdf_train[column + '_retn'] - minimum) / \
                                    (divisor) * multiplier + offset

      mdf_test[column + '_retn'] = (mdf_test[column + '_retn'] - minimum) / \
                                   (divisor) * multiplier + offset
      
      scalingapproach = 'mnmx'
      
    elif maximum <= 0 and minimum <= 0:
    
      #perform min-max scaling to train and test sets using values from train
      mdf_train[column + '_retn'] = (mdf_train[column + '_retn'] - maximum) / \
                                    (divisor) * multiplier + offset

      mdf_test[column + '_retn'] = (mdf_test[column + '_retn'] - maximum) / \
                                   (divisor) * multiplier + offset
      
      scalingapproach = 'mxmn'
    
    #create list of columns
    nmbrcolumns = [column + '_retn']

    nmbrnormalization_dict = {column + '_retn' : {'minimum' : minimum, \
                                                  'maximum' : maximum, \
                                                  'mean' : mean, \
                                                  'std' : std, \
                                                  'mad' : mad, \
                                                  'scalingapproach' : scalingapproach, \
                                                  'offset' : offset, \
                                                  'multiplier': multiplier, \
                                                  'cap' : cap, \
                                                  'floor' : floor, \
                                                  'divisor' : divisor}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'retn', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
        
    return mdf_train, mdf_test, column_dict_list

  def _process_mean(self, mdf_train, mdf_test, column, category, postprocess_dict, params = {}):
    '''
    #process_mean(mdf_train, mdf_test, column, category)
    #function to scale data to minimum of 0 and maximum of 1 \
    #based on min/max values from training set for this column
    #takes as arguement pandas dataframe of training and test data (mdf_train), (mdf_test)\
    #and the name of the column string ('column') and parent category (category)
    #replaces missing or improperly formatted data with mean of remaining values
    #returns same dataframes with new column of name column + '_mnmx'
    #note this is a "dualprocess" function since is applied to both dataframes
    #expect this approach works better when the numerical distribution is thin tailed
    #if only have training but not test data handy, use same training data for both
    #dataframe inputs
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    #initialize parameters
    if 'offset' in params:
      offset = params['offset']
    else:
      offset = 0
      
    if 'multiplier' in params:
      multiplier = params['multiplier']
    else:
      multiplier = 1
    
    if 'cap' in params:
      cap = params['cap']
    else:
      cap = False
      
    if 'floor' in params:
      floor = params['floor']
    else:
      floor = False
    
    if inplace is not True:
      
      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self._df_copy_train(mdf_train, column, column + '_mean', suffixoverlap_results)

      mdf_test[column + '_mean'] = mdf_test[column].copy()
    
    else:
      
      suffixoverlap_results = \
      self._df_check_suffixoverlap(mdf_train, column + '_mean', suffixoverlap_results)
      
      mdf_train.rename(columns = {column : column + '_mean'}, inplace = True)
      mdf_test.rename(columns = {column : column + '_mean'}, inplace = True)

    #convert all values to either numeric or NaN
    mdf_train[column + '_mean'] = pd.to_numeric(mdf_train[column + '_mean'], errors='coerce')
    mdf_test[column + '_mean'] = pd.to_numeric(mdf_test[column + '_mean'], errors='coerce')
    
    #get maximum value of training column
    maximum = mdf_train[column + '_mean'].max()
    
    #get minimum value of training column
    minimum = mdf_train[column + '_mean'].min()
    
    #if cap < maximum, maximum = cap
    if cap is not False and cap is not True:
      if cap < maximum:
        maximum = cap
    if floor is not False and floor is not True:
      if floor > minimum:
        minimum = floor
        
    #cap and floor application
    if cap is True:
      cap = maximum
    if floor is True:
      floor = minimum
      
    if cap is not False:
      #replace values in test > cap with cap
      mdf_train.loc[mdf_train[column + '_mean'] > cap, (column + '_mean')] \
      = cap
      
      mdf_test.loc[mdf_test[column + '_mean'] > cap, (column + '_mean')] \
      = cap
    
    if floor is not False:
      #replace values in test < floor with floor
      mdf_train.loc[mdf_train[column + '_mean'] < floor, (column + '_mean')] \
      = floor
      
      mdf_test.loc[mdf_test[column + '_mean'] < floor, (column + '_mean')] \
      = floor
    
    #a few more metrics collected for driftreport
    #get standard deviation of training data
    std = mdf_train[column + '_mean'].std()

    #get mean of training data
    mean = mdf_train[column + '_mean'].mean()
    if mean != mean:
      mean = 0
      
    #replace missing data with training set mean
    mdf_train[column + '_mean'] = mdf_train[column + '_mean'].fillna(mean)
    mdf_test[column + '_mean'] = mdf_test[column + '_mean'].fillna(mean)
    
    #avoid outlier div by zero when max = min
    maxminusmin = maximum - minimum
    if maxminusmin == 0 or maxminusmin != maxminusmin:
      maxminusmin = 1
    
    #perform min-max scaling to train and test sets using values from train
    mdf_train[column + '_mean'] = (mdf_train[column + '_mean'] - mean) / \
                                  (maxminusmin) * multiplier + offset
    
    mdf_test[column + '_mean'] = (mdf_test[column + '_mean'] - mean) / \
                                 (maxminusmin) * multiplier + offset

#     #change data type for memory savings
#     mdf_train[column + '_mnmx'] = mdf_train[column + '_mnmx'].astype(np.float32)
#     mdf_test[column + '_mnmx'] = mdf_test[column + '_mnmx'].astype(np.float32)
    
    #create list of columns
    nmbrcolumns = [column + '_mean']

    nmbrnormalization_dict = {column + '_mean' : {'minimum' : minimum, \
                                                  'maximum' : maximum, \
                                                  'maxminusmin' : maxminusmin, \
                                                  'mean' : mean, \
                                                  'std' : std, \
                                                  'offset' : offset, \
                                                  'multiplier': multiplier, \
                                                  'cap' : cap, \
                                                  'floor' : floor}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'mean', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
        
    return mdf_train, mdf_test, column_dict_list

  def _process_binary(self, mdf_train, mdf_test, column, category, \
                           postprocess_dict, params = {}):
    '''
    #process_binary(mdf, column, missing)
    #converts binary classification values to 0 or 1
    #takes as arguement a pandas dataframe (mdf_train, mdf_test), \
    #the name of the column string ('column') \
    #and the category from parent columkn (category)
    #fills missing valules with most common value
    #returns same dataframes with new column of name column + '_bnry'
    #note this is a "dualprocess" function since is applied to both dataframes
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
      
    #str_convert provides consistent encodings between numbers and string equivalent, eg 2 == '2'
    if 'str_convert' in params:
      str_convert = params['str_convert']
    else:
      str_convert = False
    
    if inplace is not True:
      
      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self._df_copy_train(mdf_train, column, column + '_bnry', suffixoverlap_results)

      mdf_test[column + '_bnry'] = mdf_test[column].copy()
    
    else:
      
      suffixoverlap_results = \
      self._df_check_suffixoverlap(mdf_train, column + '_bnry', suffixoverlap_results)
      
      mdf_train.rename(columns = {column : column + '_bnry'}, inplace = True)
      mdf_test.rename(columns = {column : column + '_bnry'}, inplace = True)
    
    if str_convert is True:
      mdf_train[column + '_bnry'] = mdf_train[column + '_bnry'].astype(str)
      mdf_test[column + '_bnry'] = mdf_test[column + '_bnry'].astype(str)

    #create plug value for missing cells as most common value
    valuecounts = pd.DataFrame(mdf_train[column + '_bnry'].value_counts())
    valuecounts = valuecounts.rename_axis('zzzinfill').sort_values(by = [column + '_bnry', 'zzzinfill'], ascending = [False, True])
    valuecounts = list(valuecounts.index)
    
    if len(valuecounts) > 0:

      if len(valuecounts) > 1:
        binary_missing_plug = valuecounts[0]
      else:
        #making an executive decision here to deviate from standardinfill of most common value
        #for this edge case where a column evaluated as binary has only single value and NaN's
        binary_missing_plug = 'zzzinfill'

      #test for nan
      if binary_missing_plug != binary_missing_plug:
        binary_missing_plug = valuecounts[1]

      #edge case when applying this transform to set with >2 values
      #this only comes up when caluclating driftreport in postmunge
      extravalues = []
      if len(valuecounts) > 2:
        i=0
        for value in valuecounts:
          if i>1:
            extravalues.append(value)
          i+=1

      #replace nan in valuecounts with binary_missing_plug so we can sort
      valuecounts = [x if x == x else binary_missing_plug for x in valuecounts]
  #     #convert everything to string for sort
  #     valuecounts = [str(x) for x in valuecounts]

      #note LabelBinarizer encodes alphabetically, with 1 assigned to first and 0 to second
      #we'll take different approach of going by most common value to 1 unless 0 or 1
      #are already in the set then we'll defer to keeping those designations in place
      #there's some added complexity here to deal with edge case of passing this function
      #to a set with >2 values as we might run into when caluclating drift in postmunge

  #     valuecounts.sort()
  #     valuecounts = sorted(valuecounts)
      #in case this includes both strings and integers for instance we'll sort this way
  #     valuecounts = sorted(valuecounts, key=lambda p: str(p))

      #we'll save these in the normalization dictionary for future reference
      onevalue = valuecounts[0]
      if len(valuecounts) > 1:
        zerovalue = valuecounts[1]
      else:
        zerovalue = 'zzzinfill'

      #special case for when the source column is already encoded as 0/1

      if len(valuecounts) <= 2:

        if 0 in valuecounts:
          zerovalue = 0
          if 1 in valuecounts:
            onevalue = 1
          else:
            if valuecounts[0] == 0:
              if len(valuecounts) > 1:
                onevalue = valuecounts[1]
              else:
                onevalue = 'zzzinfill'

        if 1 in valuecounts:
          if 0 not in valuecounts:
            if valuecounts[0] != 1:
              onevalue = 1
              zerovalue = valuecounts[0]

      #edge case same as above but when values of 0 or 1. are in set and 
      #len(valuecounts) > 2
      if len(valuecounts) > 2:
        valuecounts2 = valuecounts[:2]

        if 0 in valuecounts2:
          zerovalue = 0
          if 1 in valuecounts2:
            onevalue = 1
          else:
            if valuecounts2[0] == 0:
              if len(valuecounts) > 1:
                onevalue = valuecounts2[1]
              else:
                onevalue = 'zzzinfill'

        if 1 in valuecounts2:
          if 0 not in valuecounts2:
            if valuecounts2[0] != 1:
              onevalue = 1
              zerovalue = valuecounts2[0]

      #edge case that might come up in drift report
      if binary_missing_plug not in {onevalue, zerovalue}:
        binary_missing_plug = onevalue

      #edge case when applying this transform to set with >2 values
      #this only comes up when caluclating driftreport in postmunge
      if len(valuecounts) > 2:
        for value in extravalues:
          mdf_train[column + '_bnry'] = \
          np.where(mdf_train[column + '_bnry'] == value, binary_missing_plug, mdf_train[column + '_bnry'])
          mdf_test[column + '_bnry'] = \
          np.where(mdf_test[column + '_bnry'] == value, binary_missing_plug, mdf_test[column + '_bnry'])

      #replace missing data with specified classification
      mdf_train[column + '_bnry'] = mdf_train[column + '_bnry'].fillna(binary_missing_plug)
      mdf_test[column + '_bnry'] = mdf_test[column + '_bnry'].fillna(binary_missing_plug)

      #this addressess issue where nunique for mdftest > than that for mdf_train
      #note is currently an oportunity for improvement that NArows won't identify these poinsts as candiadates
      #for user specified infill, and as currently addressed will default to infill with most common value
      #in the mean time a workaround could be for user to manually replace extra values with nan prior to
      #postmunge application such as if they want to apply ML infill
      #this will only be an issue when nunique for df_train == 2, and nunique for df_test > 2
      #if len(mdf_test[column + '_bnry'].unique()) > 2:
      uniqueintest = mdf_test[column + '_bnry'].unique()
      for unique in uniqueintest:
        if unique not in {onevalue, zerovalue}:
          mdf_test[column + '_bnry'] = \
          np.where(mdf_test[column + '_bnry'] == unique, binary_missing_plug, mdf_test[column + '_bnry'])

      #convert column to binary 0/1 classification (replaces scikit LabelBinarizer)
      mdf_train[column + '_bnry'] = np.where(mdf_train[column + '_bnry'] == onevalue, 1, 0)
      mdf_test[column + '_bnry'] = np.where(mdf_test[column + '_bnry'] == onevalue, 1, 0)

      #create list of columns
      bnrycolumns = [column + '_bnry']

      #change data types to 8-bit (1 byte) integers for memory savings
      mdf_train[column + '_bnry'] = mdf_train[column + '_bnry'].astype(np.int8)
      mdf_test[column + '_bnry'] = mdf_test[column + '_bnry'].astype(np.int8)

      #a few more metrics collected for driftreport
      oneratio = mdf_train[column + '_bnry'].sum() / mdf_train[column + '_bnry'].shape[0]
      zeroratio = (mdf_train[column + '_bnry'].shape[0] - mdf_train[column + '_bnry'].sum() )\
                  / mdf_train[column + '_bnry'].shape[0]

      #create list of columns associated with categorical transform (blank for now)
      categorylist = []
    
    else:
      mdf_train[column + '_bnry'] = 0
      mdf_test[column + '_bnry'] = 0
      
      binary_missing_plug = 0
      onevalue = 1
      zerovalue = 0
      extravalues = 0
      oneratio = 0
      zeroratio = 0
      bnrycolumns = [column + '_bnry']

  #     bnrynormalization_dict = {column + '_bnry' : {'missing' : binary_missing_plug, \
  #                                                   'onevalue' : onevalue, \
  #                                                   'zerovalue' : zerovalue}}
    
    bnrynormalization_dict = {column + '_bnry' : {'missing' : binary_missing_plug, \
                                                  1 : onevalue, \
                                                  0 : zerovalue, \
                                                  'extravalues' : extravalues, \
                                                  'oneratio' : oneratio, \
                                                  'zeroratio' : zeroratio, \
                                                  'str_convert' : str_convert}}

    #store some values in the column_dict{} for use later in ML infill methods
    column_dict_list = []

    for bc in bnrycolumns:

      column_dict = { bc : {'category' : 'bnry', \
                           'origcategory' : category, \
                           'normalization_dict' : bnrynormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : bnrycolumns, \
                           'categorylist' : bnrycolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())

    return mdf_train, mdf_test, column_dict_list

  def _process_binary2(self, mdf_train, mdf_test, column, category, postprocess_dict, params = {}):
    '''
    #process_binary2(mdf, column, missing)
    #converts binary classification values to 0 or 1
    #takes as arguement a pandas dataframe (mdf_train, mdf_test), \
    #the name of the column string ('column') \
    #and the category from parent columkn (category)
    #fills missing valules with least common value (different than bnry)
    #returns same dataframes with new column of name column + '_bnry'
    #note this is a "dualprocess" function since is applied to both dataframes
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
      
    #str_convert provides consistent encodings between numbers and string equivalent, eg 2 == '2'
    if 'str_convert' in params:
      str_convert = params['str_convert']
    else:
      str_convert = False
    
    if inplace is not True:
      
      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self._df_copy_train(mdf_train, column, column + '_bnr2', suffixoverlap_results)

      mdf_test[column + '_bnr2'] = mdf_test[column].copy()
    
    else:
      
      suffixoverlap_results = \
      self._df_check_suffixoverlap(mdf_train, column + '_bnr2', suffixoverlap_results)
      
      mdf_train.rename(columns = {column : column + '_bnr2'}, inplace = True)
      mdf_test.rename(columns = {column : column + '_bnr2'}, inplace = True)
    
    if str_convert is True:
      mdf_train[column + '_bnr2'] = mdf_train[column + '_bnr2'].astype(str)
      mdf_test[column + '_bnr2'] = mdf_test[column + '_bnr2'].astype(str)

    #create plug value for missing cells as most common value
    valuecounts = pd.DataFrame(mdf_train[column + '_bnr2'].value_counts())
    valuecounts = valuecounts.rename_axis('zzzinfill').sort_values(by = [column + '_bnr2', 'zzzinfill'], ascending = [False, True])
    valuecounts = list(valuecounts.index)
    
    if len(valuecounts) > 0:

      if len(valuecounts) > 1:
        #binary_missing_plug = valuecounts[0]
        binary_missing_plug = valuecounts[1]
      else:
        #making an executive decision here to deviate from standardinfill of most common value
        #for this edge case where a column evaluated as binary has only single value and NaN's
        binary_missing_plug = 'zzzinfill'

      #test for nan
      if binary_missing_plug != binary_missing_plug:
        #binary_missing_plug = valuecounts[1]
        binary_missing_plug = valuecounts[0]

      #edge case when applying this transform to set with >2 values
      #this only comes up when caluclating driftreport in postmunge
      extravalues = []
      if len(valuecounts) > 2:
        i=0
        for value in valuecounts:
          if i>1:
            extravalues.append(value)
          i+=1

      #replace nan in valuecounts with binary_missing_plug so we can sort
      valuecounts = [x if x == x else binary_missing_plug for x in valuecounts]
  #     #convert everything to string for sort
  #     valuecounts = [str(x) for x in valuecounts]

      #note LabelBinarizer encodes alphabetically, with 1 assigned to first and 0 to second
      #we'll take different approach of going by most common value to 1 unless 0 or 1
      #are already in the set then we'll defer to keeping those designations in place
      #there's some added complexity here to deal with edge case of passing this function
      #to a set with >2 values as we might run into when caluclating drift in postmunge

  #     valuecounts.sort()
  #     valuecounts = sorted(valuecounts)
      #in case this includes both strings and integers for instance we'll sort this way
  #     valuecounts = sorted(valuecounts, key=lambda p: str(p))

      #we'll save these in the normalization dictionary for future reference
      onevalue = valuecounts[0]
      if len(valuecounts) > 1:
        zerovalue = valuecounts[1]
      else:
        zerovalue = 'zzzinfill'

      #special case for when the source column is already encoded as 0/1

      if len(valuecounts) <= 2:

        if 0 in valuecounts:
          zerovalue = 0
          if 1 in valuecounts:
            onevalue = 1
          else:
            if valuecounts[0] == 0:
              if len(valuecounts) > 1:
                onevalue = valuecounts[1]
              else:
                onevalue = 'zzzinfill'

        if 1 in valuecounts:
          if 0 not in valuecounts:
            if valuecounts[0] != 1:
              onevalue = 1
              zerovalue = valuecounts[0]

      #edge case same as above but when values of 0 or 1. are in set and 
      #len(valuecounts) > 2
      if len(valuecounts) > 2:
        valuecounts2 = valuecounts[:2]

        if 0 in valuecounts2:
          zerovalue = 0
          if 1 in valuecounts2:
            onevalue = 1
          else:
            if valuecounts2[0] == 0:
              if len(valuecounts) > 1:
                onevalue = valuecounts2[1]
              else:
                onevalue = 'zzzinfill'

        if 1 in valuecounts2:
          if 0 not in valuecounts2:
            if valuecounts2[0] != 1:
              onevalue = 1
              zerovalue = valuecounts2[0]

      #edge case that might come up in drift report
      if binary_missing_plug not in {onevalue, zerovalue}:
        #binary_missing_plug = onevalue
        binary_missing_plug = zerovalue

      #edge case when applying this transform to set with >2 values
      #this only comes up when caluclating driftreport in postmunge
      if len(valuecounts) > 2:
        for value in extravalues:
          mdf_train[column + '_bnr2'] = \
          np.where(mdf_train[column + '_bnr2'] == value, binary_missing_plug, mdf_train[column + '_bnr2'])
          mdf_test[column + '_bnr2'] = \
          np.where(mdf_test[column + '_bnr2'] == value, binary_missing_plug, mdf_test[column + '_bnr2'])

      #replace missing data with specified classification
      mdf_train[column + '_bnr2'] = mdf_train[column + '_bnr2'].fillna(binary_missing_plug)
      mdf_test[column + '_bnr2'] = mdf_test[column + '_bnr2'].fillna(binary_missing_plug)

      #this addressess issue where nunique for mdftest > than that for mdf_train
      #note is currently an oportunity for improvement that NArows won't identify these poinsts as candiadates
      #for user specified infill, and as currently addressed will default to infill with most common value
      #in the mean time a workaround could be for user to manually replace extra values with nan prior to
      #postmunge application such as if they want to apply ML infill
      #this will only be an issue when nunique for df_train == 2, and nunique for df_test > 2
      #if len(mdf_test[column + '_bnry'].unique()) > 2:
      uniqueintest = mdf_test[column + '_bnr2'].unique()
      for unique in uniqueintest:
        if unique not in {onevalue, zerovalue}:
          mdf_test[column + '_bnr2'] = \
          np.where(mdf_test[column + '_bnr2'] == unique, binary_missing_plug, mdf_test[column + '_bnr2'])

      #convert column to binary 0/1 classification (replaces scikit LabelBinarizer)
      mdf_train[column + '_bnr2'] = np.where(mdf_train[column + '_bnr2'] == onevalue, 1, 0)
      mdf_test[column + '_bnr2'] = np.where(mdf_test[column + '_bnr2'] == onevalue, 1, 0)

      #create list of columns
      bnrycolumns = [column + '_bnr2']

      #change data types to 8-bit (1 byte) integers for memory savings
      mdf_train[column + '_bnr2'] = mdf_train[column + '_bnr2'].astype(np.int8)
      mdf_test[column + '_bnr2'] = mdf_test[column + '_bnr2'].astype(np.int8)

      #a few more metrics collected for driftreport
      oneratio = mdf_train[column + '_bnr2'].sum() / mdf_train[column + '_bnr2'].shape[0]
      zeroratio = (mdf_train[column + '_bnr2'].shape[0] - mdf_train[column + '_bnr2'].sum() )\
                  / mdf_train[column + '_bnr2'].shape[0]

      #create list of columns associated with categorical transform (blank for now)
      categorylist = []
    
    else:
      mdf_train[column + '_bnr2'] = 0
      mdf_test[column + '_bnr2'] = 0
      
      binary_missing_plug = 0
      onevalue = 1
      zerovalue = 0
      extravalues = 0
      oneratio = 0
      zeroratio = 0
      bnrycolumns = [column + '_bnr2']

  #     bnrynormalization_dict = {column + '_bnry' : {'missing' : binary_missing_plug, \
  #                                                   'onevalue' : onevalue, \
  #                                                   'zerovalue' : zerovalue}}
    
    bnrynormalization_dict = {column + '_bnr2' : {'missing' : binary_missing_plug, \
                                                  1 : onevalue, \
                                                  0 : zerovalue, \
                                                  'extravalues' : extravalues, \
                                                  'oneratio' : oneratio, \
                                                  'zeroratio' : zeroratio, \
                                                  'str_convert' : str_convert}}

    #store some values in the column_dict{} for use later in ML infill methods
    column_dict_list = []

    for bc in bnrycolumns:

      column_dict = { bc : {'category' : 'bnr2', \
                           'origcategory' : category, \
                           'normalization_dict' : bnrynormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : bnrycolumns, \
                           'categorylist' : bnrycolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())

    #return mdf, bnrycolumns, categorylist, column_dict_list
    return mdf_train, mdf_test, column_dict_list
  
  def _process_onht(self, mdf_train, mdf_test, column, category, postprocess_dict, params = {}):
    '''
    #process_onht(mdf_train, mdf_test, column, category, postprocess_dict, params = {})
    #preprocess column with one hot encoding
    #same as 'text' transform except labels returned column with integer instead of entry appender
    '''
    
    suffixoverlap_results = {}
      
    #str_convert provides consistent encodings between numbers and string equivalent, eg 2 == '2'
    if 'str_convert' in params:
      str_convert = params['str_convert']
    else:
      str_convert = False
    
    tempcolumn = column + '_onht_'
    
    suffixoverlap_results = \
    self._df_check_suffixoverlap(mdf_train, tempcolumn, suffixoverlap_results)
    
    #store original column for later retrieval
    mdf_train[tempcolumn] = mdf_train[column].copy()
    mdf_test[tempcolumn] = mdf_test[column].copy()

    #convert column to category
    mdf_train[tempcolumn] = mdf_train[tempcolumn].astype('category')
    mdf_test[tempcolumn] = mdf_test[tempcolumn].astype('category')

    #if set is categorical we'll need the plug value for missing values included
    if 'zzzinfill' not in mdf_train[tempcolumn].cat.categories:
      mdf_train[tempcolumn] = mdf_train[tempcolumn].cat.add_categories(['zzzinfill'])
    if 'zzzinfill' not in mdf_test[tempcolumn].cat.categories:
      mdf_test[tempcolumn] = mdf_test[tempcolumn].cat.add_categories(['zzzinfill'])

    #replace NA with a dummy variable
    mdf_train[tempcolumn] = mdf_train[tempcolumn].fillna('zzzinfill')
    mdf_test[tempcolumn] = mdf_test[tempcolumn].fillna('zzzinfill')

    if str_convert is True:
      #replace numerical with string equivalent
      mdf_train[tempcolumn] = mdf_train[tempcolumn].astype(str)
      mdf_test[tempcolumn] = mdf_test[tempcolumn].astype(str)
    else:
      mdf_train[tempcolumn] = mdf_train[tempcolumn].astype('object')
      mdf_test[tempcolumn] = mdf_test[tempcolumn].astype('object')

    #extract categories for column labels
    #note that .unique() extracts the labels as a numpy array
    labels_train = mdf_train[tempcolumn].unique()
#     labels_train.sort(axis=0)
    labels_train = sorted(labels_train, key=str)
    labels_train = list(labels_train)
    orig_labels_train = list(labels_train.copy())
    labels_test = mdf_test[tempcolumn].unique()
#     labels_test.sort(axis=0)
    labels_test = sorted(labels_test, key=str)
    labels_test = list(labels_test)

    #pandas one hot encoding doesn't sort integers and strings properly so using my own
    df_train_cat = pd.DataFrame(mdf_train[tempcolumn])
    df_test_cat = pd.DataFrame(mdf_test[tempcolumn])
    for entry in labels_train:
      df_train_cat[entry] = np.where(mdf_train[tempcolumn] == entry, 1, 0)
      df_test_cat[entry] = np.where(mdf_test[tempcolumn] == entry, 1, 0)
    del df_train_cat[tempcolumn]
    del df_test_cat[tempcolumn]
    
    labels_dict = {}
    i = 0
    for entry in labels_train:
      labels_dict.update({entry : column + '_onht_' + str(i)})
      i += 1
    
    #convert sparse array to pandas dataframe with column labels
    df_train_cat.columns = labels_train
    df_test_cat.columns = labels_train

    #Get missing columns in test set that are present in training set
    missing_cols = set( df_train_cat.columns ) - set( labels_test )
    
    suffixoverlap_results = \
    self._df_check_suffixoverlap(mdf_train, list(df_train_cat), suffixoverlap_results)

    #concatinate the sparse set with the rest of our training data
    mdf_train = pd.concat([mdf_train, df_train_cat], axis=1)
    mdf_test = pd.concat([mdf_test, df_test_cat], axis=1)

    del mdf_train[tempcolumn]    
    del mdf_test[tempcolumn]
    
    #delete _NArw column, this will be processed seperately in the processfamily function
    #delete support NArw2 column
#     columnNArw = column + '_NArw'
    columnNAr2 = column + '_zzzinfill'
    if columnNAr2 in mdf_train.columns:
      del mdf_train[columnNAr2]
    if columnNAr2 in mdf_test.columns:
      del mdf_test[columnNAr2]
    if 'zzzinfill' in orig_labels_train:
      orig_labels_train.remove('zzzinfill')

#     del mdf_train[column + '_NAr2']    
#     del mdf_test[column + '_NAr2']
    
    #create output of a list of the created column names
    NAcolumn = columnNAr2
    labels_train = list(df_train_cat)
    if NAcolumn in labels_train:
      labels_train.remove(NAcolumn)
    textcolumns = labels_train
    
    #now we'll creaate a dicitonary of the columns : categories for later reference
    #reminder here is list of. unque values from original column
    #labels_train
    
    normalizationdictvalues = labels_train
    normalizationdictkeys = textcolumns
    
    normalizationdictkeys = sorted(normalizationdictkeys, key=str)
    normalizationdictvalues = sorted(normalizationdictvalues, key=str)
    
    #textlabelsdict = dict(zip(normalizationdictkeys, normalizationdictvalues))
    textlabelsdict = dict(zip(normalizationdictvalues, orig_labels_train))
    
    #change data types to 8-bit (1 byte) integers for memory savings
    for textcolumn in textcolumns:
      mdf_train[textcolumn] = mdf_train[textcolumn].astype(np.int8)
      mdf_test[textcolumn] = mdf_test[textcolumn].astype(np.int8)

    #store some values in the text_dict{} for use later in ML infill methods
    column_dict_list = []

    categorylist = textcolumns.copy()
#     categorylist.remove(columnNArw)

    #now convert coloumn headers from text convention to onht convention
    mdf_train = mdf_train.rename(columns=labels_dict)
    mdf_test  = mdf_test.rename(columns=labels_dict)
    
    textcolumns = [labels_dict[entry] for entry in textcolumns]
    
    inverse_labels_dict = {value:key for key,value in labels_dict.items()}

    for tc in textcolumns:
    
      #new parameter collected for driftreport
      tc_ratio = tc + '_ratio'
      tcratio = mdf_train[tc].sum() / mdf_train[tc].shape[0]

      textnormalization_dict = {tc : {'textlabelsdict_onht' : textlabelsdict, \
                                      tc_ratio : tcratio, \
                                      'labels_dict' : labels_dict, \
                                      'inverse_labels_dict' : inverse_labels_dict, \
                                      'text_categorylist' : categorylist, \
                                      'str_convert' : str_convert}}
      
      column_dict = {tc : {'category' : 'onht', \
                           'origcategory' : category, \
                           'normalization_dict' : textnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : textcolumns, \
                           'categorylist' : textcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
    
    return mdf_train, mdf_test, column_dict_list

  def _process_text(self, mdf_train, mdf_test, column, category, postprocess_dict, params = {}):
    '''
    #process_text(mdf_train, mdf_test, column, category)
    #preprocess column with text categories
    #takes as arguement two pandas dataframe containing training and test data respectively 
    #(mdf_train, mdf_test), and the name of the column string ('column')
    #and the name of the category from parent column (category)
    #note this trains both training and test data simultaneously due to unique treatment if any category
    #missing from training set but not from test set to ensure consistent formatting 
    #doesn't delete the original column from master dataframe but
    #creates onehot encodings
    #with columns named after column_ + text categories
    #any categories missing from the training set removed from test set
    #any category present in training but missing from test set given a column of zeros for consistent formatting
    #ensures order of all new columns consistent between both sets
    #returns two transformed dataframe (mdf_train, mdf_test) \
    #and a list of the new column names (textcolumns)
    
    #if only have training but not test data handy, use same training data for both dataframe inputs
    '''
    
    suffixoverlap_results = {}
    
    tempsuffix = str(mdf_train[column].unique()[0])
    
    tempcolumn = column + '_' + tempsuffix
    
    suffixoverlap_results = \
    self._df_check_suffixoverlap(mdf_train, tempcolumn, suffixoverlap_results)
    
    #store original column for later retrieval
    mdf_train[tempcolumn] = mdf_train[column].copy()
    mdf_test[tempcolumn] = mdf_test[column].copy()

    #convert column to category
    mdf_train[tempcolumn] = mdf_train[tempcolumn].astype('category')
    mdf_test[tempcolumn] = mdf_test[tempcolumn].astype('category')

    #if set is categorical we'll need the plug value for missing values included
    if 'zzzinfill' not in mdf_train[tempcolumn].cat.categories:
      mdf_train[tempcolumn] = mdf_train[tempcolumn].cat.add_categories(['zzzinfill'])
    if 'zzzinfill' not in mdf_test[tempcolumn].cat.categories:
      mdf_test[tempcolumn] = mdf_test[tempcolumn].cat.add_categories(['zzzinfill'])
      
    #replace NA with a dummy variable
    mdf_train[tempcolumn] = mdf_train[tempcolumn].fillna('zzzinfill')
    mdf_test[tempcolumn] = mdf_test[tempcolumn].fillna('zzzinfill')

    #replace numerical with string equivalent
    mdf_train[tempcolumn] = mdf_train[tempcolumn].astype(str)
    mdf_test[tempcolumn] = mdf_test[tempcolumn].astype(str)

    #extract categories for column labels
    #note that .unique() extracts the labels as a numpy array
    labels_train = mdf_train[tempcolumn].unique()
    labels_train.sort(axis=0)
    labels_train = list(labels_train)
    orig_labels_train = list(labels_train.copy())
    labels_test = mdf_test[tempcolumn].unique()
    labels_test.sort(axis=0)
    labels_test = list(labels_test)

    #pandas one hot encoder
    df_train_cat = pd.get_dummies(mdf_train[tempcolumn])
    df_test_cat = pd.get_dummies(mdf_test[tempcolumn])

    #append column header name to each category listing
    labels_train = [column + '_' + entry for entry in labels_train]
    labels_test = [column + '_' + entry for entry in labels_test]
    
    #convert sparse array to pandas dataframe with column labels
    df_train_cat.columns = labels_train
    df_test_cat.columns = labels_test

    #Get missing columns in test set that are present in training set
    missing_cols = set( df_train_cat.columns ) - set( df_test_cat.columns )

    #Add a missing column in test set with default value equal to 0
    for c in missing_cols:
        df_test_cat[c] = 0
    #Ensure the order of column in the test set is in the same order than in train set
    #Note this also removes categories in test set that aren't present in training set
    df_test_cat = df_test_cat[df_train_cat.columns]

    del mdf_train[tempcolumn]    
    del mdf_test[tempcolumn]
    
    suffixoverlap_results = \
    self._df_check_suffixoverlap(mdf_train, list(df_train_cat), suffixoverlap_results)
    
    #concatinate the sparse set with the rest of our training data
    mdf_train = pd.concat([mdf_train, df_train_cat], axis=1)
    mdf_test = pd.concat([mdf_test, df_test_cat], axis=1)
    
    #delete _NArw column, this will be processed seperately in the processfamily function
    #delete support NArw2 column
#     columnNArw = column + '_NArw'
    columnNAr2 = column + '_zzzinfill'
    if columnNAr2 in mdf_train.columns:
      del mdf_train[columnNAr2]
    if columnNAr2 in mdf_test.columns:
      del mdf_test[columnNAr2]
    if 'zzzinfill' in orig_labels_train:
      orig_labels_train.remove('zzzinfill')

#     del mdf_train[column + '_NAr2']    
#     del mdf_test[column + '_NAr2']
    
    #create output of a list of the created column names
    NAcolumn = columnNAr2
    labels_train = list(df_train_cat)
    if NAcolumn in labels_train:
      labels_train.remove(NAcolumn)
    textcolumns = labels_train
    
    #now we'll creaate a dicitonary of the columns : categories for later reference
    #reminder here is list of. unque values from original column
    #labels_train
    
    normalizationdictvalues = labels_train
    normalizationdictkeys = textcolumns
    
    normalizationdictkeys.sort()
    normalizationdictvalues.sort()
    
    #textlabelsdict = dict(zip(normalizationdictkeys, normalizationdictvalues))
    textlabelsdict = dict(zip(normalizationdictvalues, orig_labels_train))
    
    #change data types to 8-bit (1 byte) integers for memory savings
    for textcolumn in textcolumns:
      mdf_train[textcolumn] = mdf_train[textcolumn].astype(np.int8)
      mdf_test[textcolumn] = mdf_test[textcolumn].astype(np.int8)

    #store some values in the text_dict{} for use later in ML infill methods
    column_dict_list = []

    for tc in textcolumns:
    
      #new parameter collected for driftreport
      tc_ratio = tc + '_ratio'
      tcratio = mdf_train[tc].sum() / mdf_train[tc].shape[0]

      textnormalization_dict = {tc : {'textlabelsdict_text' : textlabelsdict, \
                                      tc_ratio : tcratio}}
      
      column_dict = {tc : {'category' : 'text', \
                           'origcategory' : category, \
                           'normalization_dict' : textnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : textcolumns, \
                           'categorylist' : textcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
    
    return mdf_train, mdf_test, column_dict_list

  def _process_smth(self, mdf_train, mdf_test, column, category, postprocess_dict, params = {}):
    '''
    #process_smth(mdf_train, mdf_test, column, category, postprocess_dict, params = {})
    #preprocess column with one hot encoding
    #followed by application of label smoothing
    #accepts parameters for activation value (float 0.5-1), 
    #LSfit parameter to activate fitted smoothing
    #testsmooth parameter to activate consistently smooting test data
    '''
    
    suffixoverlap_results = {}
      
    #str_convert provides consistent encodings between numbers and string equivalent, eg 2 == '2'
    if 'str_convert' in params:
      str_convert = params['str_convert']
    else:
      str_convert = False
      
    if 'activation' in params:
      activation = params['activation']
    else:
      activation = 0.9

    if 'LSfit' in params:
      LSfit = params['LSfit']
    else:
      LSfit = False
      
    if 'testsmooth' in params:
      testsmooth = params['testsmooth']
    else:
      testsmooth = False
    
    tempcolumn = column + '_smth_'
    
    suffixoverlap_results = \
    self._df_check_suffixoverlap(mdf_train, tempcolumn, suffixoverlap_results)
    
    #store original column for later retrieval
    mdf_train[tempcolumn] = mdf_train[column].copy()
    mdf_test[tempcolumn] = mdf_test[column].copy()

    #convert column to category
    mdf_train[tempcolumn] = mdf_train[tempcolumn].astype('category')
    mdf_test[tempcolumn] = mdf_test[tempcolumn].astype('category')

    #if set is categorical we'll need the plug value for missing values included
    if 'zzzinfill' not in mdf_train[tempcolumn].cat.categories:
      mdf_train[tempcolumn] = mdf_train[tempcolumn].cat.add_categories(['zzzinfill'])
    if 'zzzinfill' not in mdf_test[tempcolumn].cat.categories:
      mdf_test[tempcolumn] = mdf_test[tempcolumn].cat.add_categories(['zzzinfill'])

    #replace NA with a dummy variable
    mdf_train[tempcolumn] = mdf_train[tempcolumn].fillna('zzzinfill')
    mdf_test[tempcolumn] = mdf_test[tempcolumn].fillna('zzzinfill')

    if str_convert is True:
      #replace numerical with string equivalent
      mdf_train[tempcolumn] = mdf_train[tempcolumn].astype(str)
      mdf_test[tempcolumn] = mdf_test[tempcolumn].astype(str)
    else:
      mdf_train[tempcolumn] = mdf_train[tempcolumn].astype('object')
      mdf_test[tempcolumn] = mdf_test[tempcolumn].astype('object')

    #extract categories for column labels
    #note that .unique() extracts the labels as a numpy array
    labels_train = mdf_train[tempcolumn].unique()
#     labels_train.sort(axis=0)
    labels_train = sorted(labels_train, key=str)
    labels_train = list(labels_train)
    orig_labels_train = list(labels_train.copy())
    labels_test = mdf_test[tempcolumn].unique()
#     labels_test.sort(axis=0)
    labels_test = sorted(labels_test, key=str)
    labels_test = list(labels_test)

    #pandas one hot encoding doesn't sort integers and strings properly so using my own
    df_train_cat = pd.DataFrame(mdf_train[tempcolumn])
    df_test_cat = pd.DataFrame(mdf_test[tempcolumn])
    for entry in labels_train:
      df_train_cat[entry] = np.where(mdf_train[tempcolumn] == entry, 1, 0)
      df_test_cat[entry] = np.where(mdf_test[tempcolumn] == entry, 1, 0)
    del df_train_cat[tempcolumn]
    del df_test_cat[tempcolumn]
    
    labels_dict = {}
    i = 0
    for entry in labels_train:
      labels_dict.update({entry : column + '_smth_' + str(i)})
      i += 1
    
    #convert sparse array to pandas dataframe with column labels
    df_train_cat.columns = labels_train
    df_test_cat.columns = labels_train

    #Get missing columns in test set that are present in training set
    missing_cols = set( df_train_cat.columns ) - set( labels_test )
    
    suffixoverlap_results = \
    self._df_check_suffixoverlap(mdf_train, list(df_train_cat), suffixoverlap_results)

    #concatinate the sparse set with the rest of our training data
    mdf_train = pd.concat([mdf_train, df_train_cat], axis=1)
    mdf_test = pd.concat([mdf_test, df_test_cat], axis=1)

    del mdf_train[tempcolumn]    
    del mdf_test[tempcolumn]
    
    #delete _NArw column, this will be processed seperately in the processfamily function
    #delete support NArw2 column
#     columnNArw = column + '_NArw'
    columnNAr2 = column + '_zzzinfill'
    if columnNAr2 in mdf_train.columns:
      del mdf_train[columnNAr2]
    if columnNAr2 in mdf_test.columns:
      del mdf_test[columnNAr2]
    if 'zzzinfill' in orig_labels_train:
      orig_labels_train.remove('zzzinfill')

#     del mdf_train[column + '_NAr2']    
#     del mdf_test[column + '_NAr2']
    
    #create output of a list of the created column names
    NAcolumn = columnNAr2
    labels_train = list(df_train_cat)
    if NAcolumn in labels_train:
      labels_train.remove(NAcolumn)
    textcolumns = labels_train
    
    #now we'll creaate a dicitonary of the columns : categories for later reference
    #reminder here is list of. unque values from original column
    #labels_train
    
    normalizationdictvalues = labels_train
    normalizationdictkeys = textcolumns
    
    normalizationdictkeys = sorted(normalizationdictkeys, key=str)
    normalizationdictvalues = sorted(normalizationdictvalues, key=str)
    
    #textlabelsdict = dict(zip(normalizationdictkeys, normalizationdictvalues))
    textlabelsdict = dict(zip(normalizationdictvalues, orig_labels_train))
    
#     #change data types to 8-bit (1 byte) integers for memory savings
#     for textcolumn in textcolumns:
#       mdf_train[textcolumn] = mdf_train[textcolumn].astype(np.int8)
#       mdf_test[textcolumn] = mdf_test[textcolumn].astype(np.int8)

    #store some values in the text_dict{} for use later in ML infill methods
    column_dict_list = []

    categorylist = textcolumns.copy()
#     categorylist.remove(columnNArw)

    #now convert coloumn headers from text convention to onht convention
    mdf_train = mdf_train.rename(columns=labels_dict)
    mdf_test  = mdf_test.rename(columns=labels_dict)
    
    textcolumns = [labels_dict[entry] for entry in textcolumns]
    
    inverse_labels_dict = {value:key for key,value in labels_dict.items()}
    
    #now apply label smoothing
    category = 'smth'
    LSfitparams_dict = {}

    categorycomplete_dict = dict(zip(textcolumns, [False]*len(textcolumns)))

    for labelsmoothingcolumn in textcolumns:

      if categorycomplete_dict[labelsmoothingcolumn] is False:

        mdf_train, categorycomplete_dict, LSfitparams_dict = \
        self._apply_LabelSmoothing(mdf_train, 
                                  labelsmoothingcolumn, 
                                  activation, 
                                  textcolumns, 
                                  category, 
                                  categorycomplete_dict, 
                                  LSfit, 
                                  LSfitparams_dict)

    #smoothing not applied to test data consistent with postmunge convention
    #(postmunge can apply based on traindata parameter or by activating testsmooth)
    if testsmooth is True:
      
      categorycomplete_test_dict = dict(zip(textcolumns, [False]*len(textcolumns)))
      
      for labelsmoothingcolumn in textcolumns:

        if categorycomplete_test_dict[labelsmoothingcolumn] is False:

          mdf_test, categorycomplete_dict = \
          self._postapply_LabelSmoothing(mdf_test, 
                                        labelsmoothingcolumn, 
                                        categorycomplete_test_dict, 
                                        LSfitparams_dict)
    
    for tc in textcolumns:
    
      #new parameter collected for driftreport
      tc_ratio = tc + '_ratio'
      tcratio = mdf_train[tc].sum() / mdf_train[tc].shape[0]
      
      textnormalization_dict = {tc : {'textlabelsdict_smth' : textlabelsdict, \
                                      tc_ratio : tcratio, \
                                      'labels_dict' : labels_dict, \
                                      'inverse_labels_dict' : inverse_labels_dict, \
                                      'text_categorylist' : categorylist, \
                                      'str_convert' : str_convert, \
                                      'LSfitparams_dict' : LSfitparams_dict, \
                                      'activation' : activation, \
                                      'LSfit' : LSfit, \
                                      'testsmooth' : testsmooth, \
                                      'categorylist' : textcolumns}}
      
      column_dict = {tc : {'category' : 'smth', \
                           'origcategory' : category, \
                           'normalization_dict' : textnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : textcolumns, \
                           'categorylist' : textcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
    
    return mdf_train, mdf_test, column_dict_list

  def _process_lngt(self, df, column, category, postprocess_dict, params = {}):
    '''
    #processing funciton that length of string for each entry
    #such as a heuristic for information content
    #default infill is len(str(np.nan)) = 3
    #note this is a "singleprocess" function since is applied to single dataframe
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    if inplace is not True:
      
      #copy source column into new column
      df, suffixoverlap_results = \
      self._df_copy_train(df, column, column + '_lngt', suffixoverlap_results)
    
    else:
      
      suffixoverlap_results = \
      self._df_check_suffixoverlap(df, column + '_lngt', suffixoverlap_results)
      
      df.rename(columns = {column : column + '_lngt'}, inplace = True)
    
    df[column + '_lngt'] = df[column + '_lngt'].astype(str).transform(len)
    
    #grab a fe4w driftreport metrics:
    #get maximum value of training column
    maximum = df[column + '_lngt'].max()
    
    #get minimum value of training column
    minimum = df[column + '_lngt'].min()
    
    #get minimum value of training column
    mean = df[column + '_lngt'].mean()
    
    #get standard deviation of training column
    std = df[column + '_lngt'].std()

    #create list of columns
    columns = [column + '_lngt']

    #create normalization dictionary
    normalization_dict = {column + '_lngt' : {'maximum' : maximum, \
                                              'minimum' : minimum, \
                                              'mean' : mean, \
                                              'std' : std }}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []
    
    for nc in columns:

      column_dict = { nc : {'category' : 'lngt', \
                           'origcategory' : category, \
                           'normalization_dict' : normalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : columns, \
                           'categorylist' : columns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())

    return df, column_dict_list
  
  def _process_UPCS(self, df, column, category, postprocess_dict, params = {}):
    '''
    #processing funciton that converts columns to uppercase strings
    #such as to allow consistnet encoding if data has upper/lower case discrepencies
    #default infill is a distinct entry as string NAN
    #note that with assigninfill this can be converted to other infill methods
    #returns same dataframe with new column of name column + '_UPCS'
    #note this is a "singleprocess" function since is applied to single dataframe
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    if 'activate' in params:
      activate = params['activate']
    else:
      activate = True
    
    if inplace is not True:
      
      #copy source column into new column
      df, suffixoverlap_results = \
      self._df_copy_train(df, column, column + '_UPCS', suffixoverlap_results)
    
    else:
      
      suffixoverlap_results = \
      self._df_check_suffixoverlap(df, column + '_UPCS', suffixoverlap_results)
      
      df.rename(columns = {column : column + '_UPCS'}, inplace = True)
    
    #convert to uppercase string based on activate parameter
    if activate is True:
      #convert column to string except for nan infill points
      df[column + '_UPCS'] = \
      np.where(df[column + '_UPCS'] == df[column + '_UPCS'], df[column + '_UPCS'].astype(str), df[column + '_UPCS'])
      #convert to uppercase
      df[column + '_UPCS'] = \
      np.where(df[column + '_UPCS'] == df[column + '_UPCS'], df[column + '_UPCS'].str.upper(), df[column + '_UPCS'])

    #create list of columns
    UPCScolumns = [column + '_UPCS']

    #create normalization dictionary
    normalization_dict = {column + '_UPCS' : {'activate' : activate}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []
    
    for nc in UPCScolumns:

      column_dict = { nc : {'category' : 'UPCS', \
                           'origcategory' : category, \
                           'normalization_dict' : normalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : UPCScolumns, \
                           'categorylist' : UPCScolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())

    return df, column_dict_list

  def _process_splt(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    '''
    #process_splt(mdf_train, mdf_test, column, category)
    #preprocess column with categorical entries as strings
    #identifies overlaps of subsets of those strings and records
    #as a new boolan column
    #for example, if a categoical set consisted of unique values 
    #['west', 'north', 'northeast']
    #then a new column would be created idenitifying cells which included 
    #'north' in their entries
    #(here for north and northeast)
    #returns as column titled origcolumn_splt_entry    
    #missing values are ignored by default
    '''
    
    suffixoverlap_results = {}
    
    #overlap_lengths = [20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7 , 6, 5]
    
    if 'minsplit' in params:
      minsplit = params['minsplit'] - 1
    else:
      minsplit = 4
      
    if 'space_and_punctuation' in params:
      space_and_punctuation = params['space_and_punctuation']
    else:
      space_and_punctuation = True
      
    if 'excluded_characters' in params:
      excluded_characters = params['excluded_characters']
    else:
      excluded_characters = [' ', ',', '.', '?', '!', '(', ')']
      
    if 'concurrent_activations' in params:
      concurrent_activations = params['concurrent_activations']
    else:
      concurrent_activations = False
      
    if 'int_headers' in params:
      int_headers = params['int_headers']
    else:
      int_headers = False
      
    if 'suffix' in params:
      suffix = params['suffix']
    else:
      suffix = '_splt'
      
    if 'test_same_as_train' in params:
      test_same_as_train = params['test_same_as_train']
    else:
      test_same_as_train = False
    
    #first we find overlaps from mdf_train
    
    unique_list = list(mdf_train[column].unique())

    unique_list = list(map(str, unique_list))
    
    maxlength = max(len(x) for x in unique_list)
    
    overlap_lengths = list(range(maxlength - 1, minsplit, -1))

    overlap_dict = {}

    #we'll populate overlap_dict as
    #{extract_with_overlap : [list of associate categories with that overlap]}

    #we'll cycle through the overlap lengths and only record an overlap 
    #if it is not a subset of those already recorded
    
    for overlap_length in overlap_lengths:

      for unique in unique_list:

        len_unique = len(unique)

        if len_unique >= overlap_length:

          nbr_iterations = len_unique - overlap_length + 1

          for i in range(nbr_iterations):

            extract = unique[i:(overlap_length+i)]

            extract_already_in_overlap_dict = False

            for key in overlap_dict:

              len_key = len(key)

              if len_key >= overlap_length:

                nbr_iterations3 = len_key - overlap_length + 1

                for k in range(nbr_iterations3):

                  extract3 = key[k:(overlap_length+k)]
                  
                  if concurrent_activations is False:

                    if extract == extract3:

                      extract_already_in_overlap_dict = True
                      
                      break
                      
                  elif concurrent_activations is True:
                    
                    if extract == extract3 and unique in overlap_dict[key]:

                      extract_already_in_overlap_dict = True
                      
                      break
                      
                if extract_already_in_overlap_dict is True:
                  
                  break

            if extract_already_in_overlap_dict is False:

              for unique2 in unique_list:

                if unique2 != unique:

                  len_unique2 = len(unique2)

                  nbr_iterations2 = len_unique2 - overlap_length + 1

                  for j in range(nbr_iterations2):

                    extract2 = unique2[j:(overlap_length+j)]

                    #________
                    
                    if space_and_punctuation is True:

                      if extract2 == extract:

                        if extract in overlap_dict:

                          if unique2 not in overlap_dict[extract]:

                            overlap_dict[extract].append(unique2)
                            
                            if concurrent_activations is False:

                              break

                          if unique not in overlap_dict[extract]:

                            overlap_dict[extract].append(unique)
                            
                            if concurrent_activations is False:

                              break

                        #else if we don't have a key for extract
                        else:

                          overlap_dict.update({extract : [unique, unique2]})
                          
                          if concurrent_activations is False:

                            break
                          
                    elif space_and_punctuation is False:
                      
                      for scrub_punctuation in excluded_characters:
                        
                        extract2 = extract2.replace(scrub_punctuation, '')
                        
                      #if any punctuation was scrubbed these two extracts will be different lengths
                      if extract2 == extract:

                        if extract in overlap_dict:

                          if unique2 not in overlap_dict[extract]:

                            overlap_dict[extract].append(unique2)
                            
                            if concurrent_activations is False:

                              break

                          if unique not in overlap_dict[extract]:

                            overlap_dict[extract].append(unique)
                            
                            if concurrent_activations is False:

                              break

                        #else if we don't have a key for extract
                        else:

                          overlap_dict.update({extract : [unique, unique2]})
                          
                          if concurrent_activations is False:

                            break
     
    #now for mdf_test we'll only consider those overlaps already identified from train set
    
    if test_same_as_train is True:
      test_overlap_dict = overlap_dict
    
    elif test_same_as_train is False:

      unique_list_test = list(mdf_test[column].unique())

      unique_list_test = list(map(str, unique_list_test))

      test_overlap_dict = {}

      train_keys = list(overlap_dict)

      train_keys.sort(key = len, reverse=True)

      for key in train_keys:

        test_overlap_dict.update({key:[]})

      for dict_key in train_keys:

        for unique_test in unique_list_test:

          len_key = len(dict_key)

          if len(unique_test) >= len_key:

            nbr_iterations4 = len(unique_test) - len_key + 1

            for l in range(nbr_iterations4):

              extract4 = unique_test[l:(len_key+l)]

              if extract4 == dict_key:

                test_overlap_dict[dict_key].append(unique_test)

                if concurrent_activations is False:

                  break
    
    newcolumns = []

    for dict_key in overlap_dict:

      newcolumn = column + suffix + '_' + dict_key
      
      mdf_train, suffixoverlap_results = \
      self._df_copy_train(mdf_train, column, newcolumn, suffixoverlap_results)
      
      mdf_test[newcolumn] = mdf_test[column].copy()

      mdf_train[newcolumn] = mdf_train[newcolumn].astype(str)
      mdf_test[newcolumn] = mdf_test[newcolumn].astype(str)

      mdf_train[newcolumn] = mdf_train[newcolumn].isin(overlap_dict[dict_key])
      mdf_train[newcolumn] = mdf_train[newcolumn].astype(np.int8)
      
      mdf_test[newcolumn] = mdf_test[newcolumn].isin(test_overlap_dict[dict_key])
      mdf_test[newcolumn] = mdf_test[newcolumn].astype(np.int8)

      newcolumns.append(newcolumn)
      
    preint_newcolumns = newcolumns.copy()
      
    if int_headers is True:
      
      int_labels_dict = {}
      i = 0
      for entry in newcolumns:
        int_labels_dict.update({entry : column + suffix + '_' + str(i)})
        i += 1
        
      newcolumns = [int_labels_dict[entry] for entry in newcolumns]
        
      #now convert column headers from string to int convention
      suffixoverlap_results = \
      self._df_check_suffixoverlap(mdf_train, newcolumns, suffixoverlap_results)
      
      mdf_train = mdf_train.rename(columns=int_labels_dict)
      mdf_test  = mdf_test.rename(columns=int_labels_dict)

      inverse_int_labels_dict = {value:key for key,value in int_labels_dict.items()}
      for key in inverse_int_labels_dict:
        inverse_int_labels_dict[key] = inverse_int_labels_dict[key][len(column) + 1:]
        
    else:
      int_labels_dict = False
      inverse_int_labels_dict = False
    
    column_dict_list = []

    for tc in newcolumns:

      textnormalization_dict = {tc : {'suffix' : suffix, \
                                      'test_same_as_train' : test_same_as_train, \
                                      'overlap_dict' : overlap_dict, \
                                      'splt_newcolumns_splt'   : newcolumns, \
                                      'minsplit' : minsplit, \
                                      'concurrent_activations' : concurrent_activations, \
                                      'preint_newcolumns' : preint_newcolumns, \
                                      'int_headers' : int_headers, \
                                      'int_labels_dict' : int_labels_dict, \
                                      'inverse_int_labels_dict' : inverse_int_labels_dict}}
      
      column_dict = {tc : {'category' : 'splt', \
                           'origcategory' : category, \
                           'normalization_dict' : textnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : newcolumns, \
                           'categorylist' : newcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
      
    if len(newcolumns) == 0:
      
      column_dict_list = []
    
    return mdf_train, mdf_test, column_dict_list

  def _process_spl2(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    '''
    #process_spl2(mdf_train, mdf_test, column, category)
    #preprocess column with categorical entries as strings
    #identifies overlaps of subsets of those strings and replaces entries 
    #with their redecued overlap
    #replaces entries without overlap to 0 (unique to spl5)
    #for example, if a categorical set consisted of unique values 
    #['west', 'north', 'northeast']
    #then a new column would be created in which the entry 'north' 
    #replaced cells with north in their entries
    #(here for north and northeast)
    #and cells with west would be set to 0
    #returns as column titled origcolumn_spl2
    #missing values are ignored by default
    #this alternative to splt may be benficial for instance if one wanted 
    #to follow with an ordl encoding
    '''
    
    suffixoverlap_results = {}
    
#     overlap_lengths = [20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7 , 6, 5]

    if 'minsplit' in params:
        
      minsplit = params['minsplit'] - 1
    
    else:
      
      minsplit = 4
      
    if 'space_and_punctuation' in params:
      space_and_punctuation = params['space_and_punctuation']
    else:
      space_and_punctuation = True
      
    if 'excluded_characters' in params:
      excluded_characters = params['excluded_characters']
    else:
      excluded_characters = [' ', ',', '.', '?', '!', '(', ')']
      
    if 'suffix' in params:
      suffix = params['suffix']
    else:
      suffix = '_spl2'
      
    if 'test_same_as_train' in params:
      test_same_as_train = params['test_same_as_train']
    else:
      test_same_as_train = False
      
    if 'consolidate_nonoverlaps' in params:
      consolidate_nonoverlaps = params['consolidate_nonoverlaps']
    else:
      consolidate_nonoverlaps = False
    
    #first we find overlaps from mdf_train
    
    unique_list = list(mdf_train[column].unique())

    unique_list = list(map(str, unique_list))
    
    maxlength = max(len(x) for x in unique_list)
    
    overlap_lengths = list(range(maxlength - 1, minsplit, -1))

    overlap_dict = {}

    #we'll populate overlap_dict as
    #{extract_with_overlap : [list of associate categories with that overlap]}

    #we'll cycle through the overlap lengths and only record an overlap 
    #if it is not a subset of those already recorded
    
    for overlap_length in overlap_lengths:

      for unique in unique_list:

        len_unique = len(unique)

        if len_unique >= overlap_length:

          nbr_iterations = len_unique - overlap_length + 1

          for i in range(nbr_iterations):

            extract = unique[i:(overlap_length+i)]

            extract_already_in_overlap_dict = False

            for key in overlap_dict:

              len_key = len(key)

              if len_key >= overlap_length:

                nbr_iterations3 = len_key - overlap_length + 1

                for k in range(nbr_iterations3):

                  extract3 = key[k:(overlap_length+k)]

                  if extract == extract3:

                    extract_already_in_overlap_dict = True
                    
                    break
                    
                if extract_already_in_overlap_dict is True:
                  
                  break

            if extract_already_in_overlap_dict is False:

              for unique2 in unique_list:

                if unique2 != unique:

                  len_unique2 = len(unique2)

                  nbr_iterations2 = len_unique2 - overlap_length + 1

                  for j in range(nbr_iterations2):

                    extract2 = unique2[j:(overlap_length+j)]

                    #________
                    
                    if space_and_punctuation is True:

                      if extract2 == extract:

                        if extract in overlap_dict:

                          if unique2 not in overlap_dict[extract]:

                            overlap_dict[extract].append(unique2)
                            
                            break

                          if unique not in overlap_dict[extract]:

                            overlap_dict[extract].append(unique)
                            
                            break

                        #else if we don't have a key for extract
                        else:

                          overlap_dict.update({extract : [unique, unique2]})
                          
                          break
                          
                    elif space_and_punctuation is False:
                      
                      for scrub_punctuation in excluded_characters:
                        
                        extract2 = extract2.replace(scrub_punctuation, '')
                        
                      #if any punctuation was scrubbed these two extracts will be different lengths
                      if extract2 == extract:

                        if extract in overlap_dict:

                          if unique2 not in overlap_dict[extract]:

                            overlap_dict[extract].append(unique2)
                            
                            break

                          if unique not in overlap_dict[extract]:

                            overlap_dict[extract].append(unique)
                            
                            break

                        #else if we don't have a key for extract
                        else:

                          overlap_dict.update({extract : [unique, unique2]})
                          
                          break
      
    #now for mdf_test we'll only consider those overlaps already 
    #identified from train set
    
    if test_same_as_train is True:
      test_overlap_dict = overlap_dict
    
    elif test_same_as_train is False:
    
      unique_list_test = list(mdf_test[column].unique())

      unique_list_test = list(map(str, unique_list_test))

      test_overlap_dict = {}

      train_keys = list(overlap_dict)

      train_keys.sort(key = len, reverse=True)

      for key in train_keys:

        test_overlap_dict.update({key:[]})

      for dict_key in train_keys:

        for unique_test in unique_list_test:

          len_key = len(dict_key)

          if len(unique_test) >= len_key:

            nbr_iterations4 = len(unique_test) - len_key + 1

            for l in range(nbr_iterations4):

              extract4 = unique_test[l:(len_key+l)]

              if extract4 == dict_key:

                test_overlap_dict[dict_key].append(unique_test)

                break
    
    #so that was all comparable to splt, now for spl2 we'll create a new 
    #dictionary structred as
    #{original unique value : overlap extract for replacement}
    
    #since one original unique value may have entries as multiple overlaps, 
    #we'll prioritize overlaps with
    #longer string lengths and then alphabetical
    
    spl2_overlap_dict = {}
    
    overlap_key_list = list(overlap_dict)
    
    overlap_key_list.sort()
    overlap_key_list.sort(key = len, reverse=True)
    
    for overlap_key in overlap_key_list:
      
      for entry in overlap_dict[overlap_key]:
        
        if entry not in spl2_overlap_dict:
          
          spl2_overlap_dict.update({entry : overlap_key})
    
    #here's where we identify values to set to 0 for spl5
    spl5_zero_dict = {}
    if consolidate_nonoverlaps is True:
      for entry in unique_list:
        if entry not in spl2_overlap_dict:
          spl5_zero_dict.update({entry : 0})
    
    #then we'll do same for test set
    
    spl2_test_overlap_dict = {}
    
    test_overlap_key_list = list(test_overlap_dict)
    
    test_overlap_key_list.sort()
    test_overlap_key_list.sort(key = len, reverse=True)
    
    for overlap_key in test_overlap_key_list:
      
      for entry in test_overlap_dict[overlap_key]:
        
        if entry not in spl2_test_overlap_dict:
          
          spl2_test_overlap_dict.update({entry : overlap_key})
    
    #here's where we identify values to set to 0 for spl5
    spl5_test_zero_dict = {}
    if consolidate_nonoverlaps is True:

      if test_same_as_train is True:
        unique_list_test = list(mdf_test[column].unique())
        unique_list_test = list(map(str, unique_list_test))

      for entry in unique_list_test:
        if entry not in spl2_test_overlap_dict:
          spl5_test_zero_dict.update({entry : 0})
    
    newcolumns = []

#     for dict_key in overlap_dict:

    newcolumn = column + suffix
    
    mdf_train, suffixoverlap_results = \
    self._df_copy_train(mdf_train, column, newcolumn, suffixoverlap_results)
    
    mdf_test[newcolumn] = mdf_test[column].copy()
    
    mdf_train[newcolumn] = mdf_train[newcolumn].astype(str)
    mdf_test[newcolumn] = mdf_test[newcolumn].astype(str)

    mdf_train[newcolumn] = mdf_train[newcolumn].replace(spl2_overlap_dict)
    mdf_train[newcolumn] = mdf_train[newcolumn].replace(spl5_zero_dict)

#       mdf_train[newcolumn] = mdf_train[column].isin(overlap_dict[dict_key])
#       mdf_train[newcolumn] = mdf_train[newcolumn].astype(np.int8)

    mdf_test[newcolumn] = mdf_test[newcolumn].replace(spl2_test_overlap_dict)
    mdf_test[newcolumn] = mdf_test[newcolumn].replace(spl5_test_zero_dict)

#       mdf_test[newcolumn] = mdf_test[column].isin(test_overlap_dict[dict_key])
#       mdf_test[newcolumn] = mdf_test[newcolumn].astype(np.int8)

    newcolumns.append(newcolumn)
    
    column_dict_list = []

    for tc in newcolumns:

      textnormalization_dict = {tc : {'suffix' : suffix, \
                                      'test_same_as_train' : test_same_as_train, \
                                      'consolidate_nonoverlaps' : consolidate_nonoverlaps, \
                                      'overlap_dict' : overlap_dict, \
                                      'spl2_newcolumns'   : newcolumns, 
                                      'spl2_overlap_dict' : spl2_overlap_dict, \
                                      'spl2_test_overlap_dict' : spl2_test_overlap_dict, \
                                      'spl5_zero_dict' : spl5_zero_dict, \
                                      'minsplit' : minsplit}}
      
      column_dict = {tc : {'category' : 'spl2', \
                           'origcategory' : category, \
                           'normalization_dict' : textnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : newcolumns, \
                           'categorylist' : newcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
      
    if len(newcolumns) == 0:
      
      column_dict_list = []

    return mdf_train, mdf_test, column_dict_list

  def _process_sp19(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    '''
    #process_splt(mdf_train, mdf_test, column, category)
    #preprocess column with categorical entries as strings
    #identifies overlaps of subsets of those strings and records
    #as a new boolan column
    #for example, if a categoical set consisted of unique values 
    #['west', 'north', 'northeast']
    #then a new column would be created idenitifying cells which included 
    #'north' in their entries
    #(here for north and northeast)
    #returns as column titled origcolumn_splt_entry    
    #missing values are ignored by default
    
    #sp15 is comparable to splt but multiple concurrent activations allowed
    #so requires a different MLinfilltype in processdict
    
    #sp19 is comparable to sp15 but with a returned binary encoding aggregation
    '''
    
    suffixoverlap_results = {}
    
    #overlap_lengths = [20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7 , 6, 5]
    
    if 'minsplit' in params:
      minsplit = params['minsplit'] - 1
    else:
      minsplit = 4
      
    if 'space_and_punctuation' in params:
      space_and_punctuation = params['space_and_punctuation']
    else:
      space_and_punctuation = True
      
    if 'excluded_characters' in params:
      excluded_characters = params['excluded_characters']
    else:
      excluded_characters = [' ', ',', '.', '?', '!', '(', ')']
      
    if 'int_headers' in params:
      int_headers = params['int_headers']
    else:
      int_headers = False
      
    #note that same MLinfilltype in processdict ('1010')
    #may be used for both configurations but applying concurrent_activations = False
    #with sp11 is less efficient then running splt
    if 'concurrent_activations' in params:
      concurrent_activations = params['concurrent_activations']
    else:
      concurrent_activations = True
      
    if 'suffix' in params:
      suffix = params['suffix']
    else:
      suffix = '_sp19'
      
    if 'test_same_as_train' in params:
      test_same_as_train = params['test_same_as_train']
    else:
      test_same_as_train = False
    
    #first we find overlaps from mdf_train
    
    unique_list = list(mdf_train[column].unique())

    unique_list = list(map(str, unique_list))
    
    maxlength = max(len(x) for x in unique_list)
    
    overlap_lengths = list(range(maxlength - 1, minsplit, -1))

    overlap_dict = {}

    #we'll populate overlap_dict as
    #{extract_with_overlap : [list of associate categories with that overlap]}

    #we'll cycle through the overlap lengths and only record an overlap 
    #if it is not a subset of those already recorded
    
    for overlap_length in overlap_lengths:

      for unique in unique_list:

        len_unique = len(unique)

        if len_unique >= overlap_length:

          nbr_iterations = len_unique - overlap_length + 1

          for i in range(nbr_iterations):

            extract = unique[i:(overlap_length+i)]

            extract_already_in_overlap_dict = False

            for key in overlap_dict:

              len_key = len(key)

              if len_key >= overlap_length:

                nbr_iterations3 = len_key - overlap_length + 1

                for k in range(nbr_iterations3):

                  extract3 = key[k:(overlap_length+k)]
                  
                  if concurrent_activations is False:

                    if extract == extract3:

                      extract_already_in_overlap_dict = True
                      
                      break
                      
                  elif concurrent_activations is True:
                    
                    if extract == extract3 and unique in overlap_dict[key]:

                      extract_already_in_overlap_dict = True
                      
                      break
                      
                if extract_already_in_overlap_dict is True:
                  
                  break

            if extract_already_in_overlap_dict is False:

              for unique2 in unique_list:

                if unique2 != unique:

                  len_unique2 = len(unique2)

                  nbr_iterations2 = len_unique2 - overlap_length + 1

                  for j in range(nbr_iterations2):

                    extract2 = unique2[j:(overlap_length+j)]

                    #________
                    
                    if space_and_punctuation is True:

                      if extract2 == extract:

                        if extract in overlap_dict:

                          if unique2 not in overlap_dict[extract]:

                            overlap_dict[extract].append(unique2)
                            
                            if concurrent_activations is False:

                              break

                          if unique not in overlap_dict[extract]:

                            overlap_dict[extract].append(unique)
                            
                            if concurrent_activations is False:

                              break

                        #else if we don't have a key for extract
                        else:

                          overlap_dict.update({extract : [unique, unique2]})
                          
                          if concurrent_activations is False:

                            break
                          
                    elif space_and_punctuation is False:
                      
                      for scrub_punctuation in excluded_characters:
                        
                        extract2 = extract2.replace(scrub_punctuation, '')
                        
                      #if any punctuation was scrubbed these two extracts will be different lengths
                      if extract2 == extract:

                        if extract in overlap_dict:

                          if unique2 not in overlap_dict[extract]:

                            overlap_dict[extract].append(unique2)
                            
                            if concurrent_activations is False:

                              break

                          if unique not in overlap_dict[extract]:

                            overlap_dict[extract].append(unique)
                            
                            if concurrent_activations is False:

                              break

                        #else if we don't have a key for extract
                        else:

                          overlap_dict.update({extract : [unique, unique2]})
                          
                          if concurrent_activations is False:

                            break
        
    #now for mdf_test we'll only consider those overlaps already identified from train set
    
    if test_same_as_train is True:
      test_overlap_dict = overlap_dict
    
    elif test_same_as_train is False:

      unique_list_test = list(mdf_test[column].unique())

      unique_list_test = list(map(str, unique_list_test))

      test_overlap_dict = {}

      train_keys = list(overlap_dict)

      train_keys.sort(key = len, reverse=True)

      for key in train_keys:

        test_overlap_dict.update({key:[]})

      for dict_key in train_keys:

        for unique_test in unique_list_test:

          len_key = len(dict_key)

          if len(unique_test) >= len_key:

            nbr_iterations4 = len(unique_test) - len_key + 1

            for l in range(nbr_iterations4):

              extract4 = unique_test[l:(len_key+l)]

              if extract4 == dict_key:

                test_overlap_dict[dict_key].append(unique_test)

                if concurrent_activations is False:

                  break
    
    newcolumns = []

    for dict_key in overlap_dict:

      newcolumn = column + '_sp15_' + dict_key
      
      mdf_train, suffixoverlap_results = \
      self._df_copy_train(mdf_train, column, newcolumn, suffixoverlap_results)
#       mdf_train[newcolumn] = mdf_train[column].copy()
      
      mdf_test[newcolumn] = mdf_test[column].copy()

      mdf_train[newcolumn] = mdf_train[newcolumn].astype(str)
      mdf_test[newcolumn] = mdf_test[newcolumn].astype(str)

      mdf_train[newcolumn] = mdf_train[newcolumn].isin(overlap_dict[dict_key])
      mdf_train[newcolumn] = mdf_train[newcolumn].astype(np.int8)
      
      mdf_test[newcolumn] = mdf_test[newcolumn].isin(test_overlap_dict[dict_key])
      mdf_test[newcolumn] = mdf_test[newcolumn].astype(np.int8)

      newcolumns.append(newcolumn)
      
    preint_newcolumns = newcolumns.copy()
      
    if int_headers is True:
      
      int_labels_dict = {}
      i = 0
      for entry in newcolumns:
        int_labels_dict.update({entry : column + '_sp15_' + str(i)})
        i += 1
        
      #now convert column headers from string to int convention
      mdf_train = mdf_train.rename(columns=int_labels_dict)
      mdf_test  = mdf_test.rename(columns=int_labels_dict)

      newcolumns = [int_labels_dict[entry] for entry in newcolumns]

      inverse_int_labels_dict = {value:key for key,value in int_labels_dict.items()}
      for key in inverse_int_labels_dict:
        inverse_int_labels_dict[key] = inverse_int_labels_dict[key][len(column) + 1:]
        
    else:
      int_labels_dict = False
      inverse_int_labels_dict = False
    
    column_dict_list = []
    
    #begin binary encoding of set, leaving the int_headers here out of convenience, not really needed
    
    if len(newcolumns) > 0:
      
      sp19_column = column + suffix
    
      #aggregate collection of activations as string set
      #the suffix 'activations_' is to avoid potential of overlap with binary encoding and aggregated activations
      mdf_train[sp19_column] = 'activations_'
      mdf_test[sp19_column] = 'activations_'

      for entry in newcolumns:
        mdf_train[sp19_column] = mdf_train[sp19_column] + mdf_train[entry].astype(str)
        mdf_test[sp19_column] = mdf_test[sp19_column] + mdf_test[entry].astype(str)

      #extract categories for column labels
      #note that .unique() extracts the labels as a numpy array
      labels_train = list(mdf_train[sp19_column].unique())
      labels_train.sort()
      labels_test = list(mdf_test[sp19_column].unique())
      labels_test.sort()

      #if infill not present in train set, insert
      if 'zzzinfill' not in labels_train:
        labels_train = labels_train + ['zzzinfill']
        labels_train.sort()
      if 'zzzinfill' not in labels_test:
        labels_test = labels_test + ['zzzinfill']
        labels_test.sort()

      #get length of the list
      listlength = len(labels_train)

      #calculate number of columns we'll need
      binary_column_count = int(np.ceil(np.log2(listlength)))

      #initialize dictionaryt to store encodings
      binary_encoding_dict = {}
      encoding_list = []

      for i in range(listlength):

        #this converts the integer i to binary encoding
        #where f is an f string for inserting the column coount into the string to designate length of encoding
        #0 is to pad out the encoding with 0's for the length
        #and b is telling it to convert to binary 
        #note this returns a string
        encoding = format(i, f"0{binary_column_count}b")

        if i < len(labels_train):

          #store the encoding in a dictionary
          binary_encoding_dict.update({labels_train[i] : encoding})

          #store the encoding in a list for checking in next step
          encoding_list.append(encoding)


      #clear up memory
      del encoding_list
  #     del overlap_list

      #new driftreport metric _1010_activations_dict
      _1010_activations_dict = {}
      for key in binary_encoding_dict:
        sumcalc = (mdf_train[sp19_column] == key).sum() 
        ratio = sumcalc / mdf_train[sp19_column].shape[0]
        _1010_activations_dict.update({key:ratio})


      #replace the cateogries in train set via ordinal trasnformation
      mdf_train[sp19_column] = mdf_train[sp19_column].replace(binary_encoding_dict) 

      #in test set, we'll need to strike any categories that weren't present in train
      #first let'/s identify what applies
      testspecificcategories = list(set(labels_test)-set(labels_train))

      #so we'll just replace those items with our plug value
      testplug_dict = dict(zip(testspecificcategories, ['zzzinfill'] * len(testspecificcategories)))
      mdf_test[sp19_column] = mdf_test[sp19_column].replace(testplug_dict)  

      #now we'll apply the 1010 transformation to the test set
      mdf_test[sp19_column] = mdf_test[sp19_column].replace(binary_encoding_dict)    

      #ok let's create a list of columns to store each entry of the binary encoding
      _1010_columnlist = []

      for i in range(binary_column_count):

        _1010_columnlist.append(column + suffix + '_' + str(i))

      suffixoverlap_results = \
      self._df_check_suffixoverlap(mdf_train, _1010_columnlist, suffixoverlap_results)

      #now let's store the encoding
      i=0
      for _1010_column in _1010_columnlist:

        mdf_train[_1010_column] = mdf_train[sp19_column].str.slice(i,i+1).astype(np.int8)

        mdf_test[_1010_column] = mdf_test[sp19_column].str.slice(i,i+1).astype(np.int8)

        i+=1

      #now delete the support column
      del mdf_train[sp19_column]
      del mdf_test[sp19_column]

      for entry in newcolumns:
        del mdf_train[entry]
        del mdf_test[entry]

      #now store the column_dict entries
      categorylist = _1010_columnlist

      column_dict_list = []

      for tc in categorylist:

        #                                   '_1010_overlap_replace' : overlap_replace, \
        normalization_dict = {tc : {'suffix' : suffix, \
                                    'test_same_as_train' : test_same_as_train, \
                                    '_1010_binary_encoding_dict' : binary_encoding_dict, \
                                    '_1010_binary_column_count' : binary_column_count, \
                                    '_1010_activations_dict' : _1010_activations_dict, \
                                    'categorylist' : categorylist, \
                                    'overlap_dict' : overlap_dict, \
                                    'splt_newcolumns_sp19'   : newcolumns, \
                                    'minsplit' : minsplit, \
                                    'concurrent_activations' : concurrent_activations, \
                                    'preint_newcolumns' : preint_newcolumns, \
                                    'int_headers' : int_headers, \
                                    'int_labels_dict' : int_labels_dict, \
                                    'inverse_int_labels_dict' : inverse_int_labels_dict}}

        column_dict = {tc : {'category' : 'sp19', \
                             'origcategory' : category, \
                             'normalization_dict' : normalization_dict, \
                             'origcolumn' : column, \
                             'inputcolumn' : column, \
                             'columnslist' : categorylist, \
                             'categorylist' : categorylist, \
                             'infillmodel' : False, \
                             'infillcomplete' : False, \
                             'suffixoverlap_results' : suffixoverlap_results, \
                             'deletecolumn' : False}}

        column_dict_list.append(column_dict.copy())
      
    else:
      
      column_dict_list = []
    
    return mdf_train, mdf_test, column_dict_list

  def _process_sbst(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    '''
    #process_sbst(mdf_train, mdf_test, column, category)
    #preprocess column with categorical entries as strings
    #identifies cases where a full unique value is present 
    #as a subset of a longer length unique value
    #and returns one-hot activations to aggregate those cases
    
    #accepts parameters concurrent_activations to allow mulitple activations
    #actually let's make concurrent activations the default
    #and int_headers for privacy preserving headers
    
    #this differs from other string parsing functions in that
    #only complete entries are checked for presence as subsets in other entries
    '''
    
    suffixoverlap_results = {}
      
    if 'concurrent_activations' in params:
      concurrent_activations = params['concurrent_activations']
    else:
      concurrent_activations = True
      
    if 'int_headers' in params:
      int_headers = params['int_headers']
    else:
      int_headers = False

    if 'minsplit' in params:
      minsplit = params['minsplit']
    else:
      minsplit = 1
      
    if 'suffix' in params:
      suffix = params['suffix']
    else:
      suffix = '_sbst'
      
    if 'test_same_as_train' in params:
      test_same_as_train = params['test_same_as_train']
    else:
      test_same_as_train = False
    
    #first we find overlaps from mdf_train
    
    unique_list = list(mdf_train[column].unique())

    unique_list = list(map(str, unique_list))
    
    unique_list = sorted(unique_list, key=len, reverse=True)
    
#     maxlength = max(len(x) for x in unique_list)
    
#     minlength = min(len(x) for x in unique_list)
    
#     overlap_lengths = list(range(maxlength - 1, minlength, -1))

    overlap_dict = {}

    #we'll populate overlap_dict as
    #{extract_with_overlap : [list of associate categories with that overlap]}

    #we'll cycle through the overlap lengths and only record an overlap 
    #if it is not a subset of those already recorded
    
    #unique is what we are searching for
    for unique in unique_list:
      len_unique = len(unique)

      if len_unique >= minsplit:
      
        #unique2 is where we are searching
        for unique2 in unique_list:
          len_unique2 = len(unique2)
          
          if len_unique2 > len_unique:
            
            nbr_iterations = len_unique2 - len_unique + 1
            
            for i in range(nbr_iterations):
              
              extract = unique2[i:(len_unique+i)]
              
              extract_already_in_overlap_dict = False
                    
              if extract_already_in_overlap_dict is False:
                
                if extract == unique:
                  
                  if extract in overlap_dict:
                    
                    if unique2 not in overlap_dict[extract]:
                      
                      overlap_dict[extract].append(unique2)
                      
                      if concurrent_activations is False:

                        break
                        
                    # if unique not in overlap_dict[extract]:
                      
                    #   overlap_dict[extract].append(unique)

                    #   if concurrent_activations is False:

                    #     break
                        
                  #else if we don't have a key for extract
                  else:

                    overlap_dict.update({extract : [unique, unique2]})

                    if concurrent_activations is False:

                      break
                    
    #now for mdf_test we'll only consider those overlaps already identified from train set
    
    if test_same_as_train is True:
      test_overlap_dict = overlap_dict
    
    elif test_same_as_train is False:

      unique_list_test = list(mdf_test[column].unique())

      unique_list_test = list(map(str, unique_list_test))

      unique_list_test = sorted(unique_list_test, key=len, reverse=True)

      test_overlap_dict = {}

      train_keys = list(overlap_dict)

      train_keys.sort(key = len, reverse=True)

      for key in train_keys:

        test_overlap_dict.update({key:[]})

      for dict_key in train_keys:

        for unique_test in unique_list_test:

          len_key = len(dict_key)

          if len(unique_test) >= len_key:

            nbr_iterations4 = len(unique_test) - len_key + 1

            for l in range(nbr_iterations4):

              extract4 = unique_test[l:(len_key+l)]

              if extract4 == dict_key:

                test_overlap_dict[dict_key].append(unique_test)

                if concurrent_activations is False:

                  break
                
    newcolumns = []

    for dict_key in overlap_dict:

      newcolumn = column + suffix + '_' + dict_key
      
      mdf_train, suffixoverlap_results = \
      self._df_copy_train(mdf_train, column, newcolumn, suffixoverlap_results)
      
#       mdf_train[newcolumn] = mdf_train[column].copy()
  
      mdf_test[newcolumn] = mdf_test[column].copy()

      mdf_train[newcolumn] = mdf_train[newcolumn].astype(str)
      mdf_test[newcolumn] = mdf_test[newcolumn].astype(str)

      mdf_train[newcolumn] = mdf_train[newcolumn].isin(overlap_dict[dict_key])
      mdf_train[newcolumn] = mdf_train[newcolumn].astype(np.int8)
      
      mdf_test[newcolumn] = mdf_test[newcolumn].isin(test_overlap_dict[dict_key])
      mdf_test[newcolumn] = mdf_test[newcolumn].astype(np.int8)

      newcolumns.append(newcolumn)
      
    preint_newcolumns = newcolumns.copy()
      
    if int_headers is True:
      
      int_labels_dict = {}
      i = 0
      for entry in newcolumns:
        int_labels_dict.update({entry : column + suffix + '_' + str(i)})
        i += 1
        
      newcolumns = [int_labels_dict[entry] for entry in newcolumns]
        
      #now convert column headers from string to int convention
      suffixoverlap_results = \
      self._df_check_suffixoverlap(mdf_train, newcolumns, suffixoverlap_results)
      
      mdf_train = mdf_train.rename(columns=int_labels_dict)
      mdf_test  = mdf_test.rename(columns=int_labels_dict)

      inverse_int_labels_dict = {value:key for key,value in int_labels_dict.items()}
      for key in inverse_int_labels_dict:
        inverse_int_labels_dict[key] = inverse_int_labels_dict[key][len(column) + 1:]
        
    else:
      int_labels_dict = False
      inverse_int_labels_dict = False
    
    column_dict_list = []

    for tc in newcolumns:

      textnormalization_dict = {tc : {'suffix' : suffix, \
                                      'test_same_as_train' : test_same_as_train, \
                                      'overlap_dict' : overlap_dict, \
                                      'splt_newcolumns_sbst'   : newcolumns, \
                                      'minsplit' : minsplit, \
                                      'concurrent_activations' : concurrent_activations, \
                                      'preint_newcolumns' : preint_newcolumns, \
                                      'int_headers' : int_headers, \
                                      'int_labels_dict' : int_labels_dict, \
                                      'inverse_int_labels_dict' : inverse_int_labels_dict}}
      
      column_dict = {tc : {'category' : 'sbst', \
                           'origcategory' : category, \
                           'normalization_dict' : textnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : newcolumns, \
                           'categorylist' : newcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
      
    if len(newcolumns) == 0:
      
      column_dict_list = []
    
    return mdf_train, mdf_test, column_dict_list

  def _process_sbs3(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    '''
    #process_sbst(mdf_train, mdf_test, column, category)
    #preprocess column with categorical entries as strings
    #identifies cases where a full unique value is present 
    #as a subset of a longer length unique value
    #and returns one-hot activations to aggregate those cases
    
    #accepts parameters concurrent_activations to allow mulitple activations
    #actually let's make concurrent activations the default
    #and int_headers for privacy preserving headers
    
    #this differs from other string parsing functions in that
    #only complete entries are checked for presence as subsets in other entries
    
    #sbs3 is comparable to sbst but with a returned binary encoding aggregation
    '''
    
    suffixoverlap_results = {}
      
    if 'concurrent_activations' in params:
      concurrent_activations = params['concurrent_activations']
    else:
      concurrent_activations = True
      
    if 'int_headers' in params:
      int_headers = params['int_headers']
    else:
      int_headers = False

    if 'minsplit' in params:
      minsplit = params['minsplit']
    else:
      minsplit = 1
      
    if 'suffix' in params:
      suffix = params['suffix']
    else:
      suffix = '_sbs3'
      
    if 'test_same_as_train' in params:
      test_same_as_train = params['test_same_as_train']
    else:
      test_same_as_train = False
    
    #first we find overlaps from mdf_train
    
    unique_list = list(mdf_train[column].unique())

    unique_list = list(map(str, unique_list))
    
    unique_list = sorted(unique_list, key=len, reverse=True)
    
#     maxlength = max(len(x) for x in unique_list)
    
#     minlength = min(len(x) for x in unique_list)
    
#     overlap_lengths = list(range(maxlength - 1, minlength, -1))

    overlap_dict = {}

    #we'll populate overlap_dict as
    #{extract_with_overlap : [list of associate categories with that overlap]}

    #we'll cycle through the overlap lengths and only record an overlap 
    #if it is not a subset of those already recorded
    
    #unique is what we are searching for
    for unique in unique_list:
      len_unique = len(unique)

      if len_unique >= minsplit:
      
        #unique2 is where we are searching
        for unique2 in unique_list:
          len_unique2 = len(unique2)
          
          if len_unique2 > len_unique:
            
            nbr_iterations = len_unique2 - len_unique + 1
            
            for i in range(nbr_iterations):
              
              extract = unique2[i:(len_unique+i)]
              
              extract_already_in_overlap_dict = False
                    
              if extract_already_in_overlap_dict is False:
                
                if extract == unique:
                  
                  if extract in overlap_dict:
                    
                    if unique2 not in overlap_dict[extract]:
                      
                      overlap_dict[extract].append(unique2)
                      
                      if concurrent_activations is False:

                        break
                        
                    # if unique not in overlap_dict[extract]:
                      
                    #   overlap_dict[extract].append(unique)

                    #   if concurrent_activations is False:

                    #     break
                        
                  #else if we don't have a key for extract
                  else:

                    overlap_dict.update({extract : [unique, unique2]})

                    if concurrent_activations is False:

                      break
                    
    #now for mdf_test we'll only consider those overlaps already identified from train set
    
    if test_same_as_train is True:
      test_overlap_dict = overlap_dict
    
    elif test_same_as_train is False:

      unique_list_test = list(mdf_test[column].unique())

      unique_list_test = list(map(str, unique_list_test))

      unique_list_test = sorted(unique_list_test, key=len, reverse=True)

      test_overlap_dict = {}

      train_keys = list(overlap_dict)

      train_keys.sort(key = len, reverse=True)

      for key in train_keys:

        test_overlap_dict.update({key:[]})

      for dict_key in train_keys:

        for unique_test in unique_list_test:

          len_key = len(dict_key)

          if len(unique_test) >= len_key:

            nbr_iterations4 = len(unique_test) - len_key + 1

            for l in range(nbr_iterations4):

              extract4 = unique_test[l:(len_key+l)]

              if extract4 == dict_key:

                test_overlap_dict[dict_key].append(unique_test)

                if concurrent_activations is False:

                  break
                
    newcolumns = []

    for dict_key in overlap_dict:

      newcolumn = column + '_sbst_' + dict_key
      
      mdf_train, suffixoverlap_results = \
      self._df_copy_train(mdf_train, column, newcolumn, suffixoverlap_results)
      
#       mdf_train[newcolumn] = mdf_train[column].copy()
  
      mdf_test[newcolumn] = mdf_test[column].copy()

      mdf_train[newcolumn] = mdf_train[newcolumn].astype(str)
      mdf_test[newcolumn] = mdf_test[newcolumn].astype(str)

      mdf_train[newcolumn] = mdf_train[newcolumn].isin(overlap_dict[dict_key])
      mdf_train[newcolumn] = mdf_train[newcolumn].astype(np.int8)
      
      mdf_test[newcolumn] = mdf_test[newcolumn].isin(test_overlap_dict[dict_key])
      mdf_test[newcolumn] = mdf_test[newcolumn].astype(np.int8)

      newcolumns.append(newcolumn)
      
    preint_newcolumns = newcolumns.copy()
      
    if int_headers is True:
      
      int_labels_dict = {}
      i = 0
      for entry in newcolumns:
        int_labels_dict.update({entry : column + '_sbst_' + str(i)})
        i += 1
        
      newcolumns = [int_labels_dict[entry] for entry in newcolumns]
        
      #now convert column headers from string to int convention
      suffixoverlap_results = \
      self._df_check_suffixoverlap(mdf_train, newcolumns, suffixoverlap_results)
      
      mdf_train = mdf_train.rename(columns=int_labels_dict)
      mdf_test  = mdf_test.rename(columns=int_labels_dict)

      inverse_int_labels_dict = {value:key for key,value in int_labels_dict.items()}
      for key in inverse_int_labels_dict:
        inverse_int_labels_dict[key] = inverse_int_labels_dict[key][len(column) + 1:]
        
    else:
      int_labels_dict = False
      inverse_int_labels_dict = False
    
    column_dict_list = []
    
    #begin binary encoding of set, leaving the int_headers here out of convenience, not really needed
    
    if len(newcolumns) > 0:
      
      sbs3_column = column + suffix

      #aggregate collection of activations as string set
      #the suffix 'activations_' is to avoid potential of overlap with binary encoding and aggregated activations
      mdf_train[sbs3_column] = 'activations_'
      mdf_test[sbs3_column] = 'activations_'

      for entry in newcolumns:
        mdf_train[sbs3_column] = mdf_train[sbs3_column] + mdf_train[entry].astype(str)
        mdf_test[sbs3_column] = mdf_test[sbs3_column] + mdf_test[entry].astype(str)

      #extract categories for column labels
      #note that .unique() extracts the labels as a numpy array
      labels_train = list(mdf_train[sbs3_column].unique())
      labels_train.sort()
      labels_test = list(mdf_test[sbs3_column].unique())
      labels_test.sort()

      #if infill not present in train set, insert
      if 'zzzinfill' not in labels_train:
        labels_train = labels_train + ['zzzinfill']
        labels_train.sort()
      if 'zzzinfill' not in labels_test:
        labels_test = labels_test + ['zzzinfill']
        labels_test.sort()

      #get length of the list
      listlength = len(labels_train)

      #calculate number of columns we'll need
      binary_column_count = int(np.ceil(np.log2(listlength)))

      #initialize dictionaryt to store encodings
      binary_encoding_dict = {}
      encoding_list = []

      for i in range(listlength):

        #this converts the integer i to binary encoding
        #where f is an f string for inserting the column coount into the string to designate length of encoding
        #0 is to pad out the encoding with 0's for the length
        #and b is telling it to convert to binary 
        #note this returns a string
        encoding = format(i, f"0{binary_column_count}b")

        if i < len(labels_train):

          #store the encoding in a dictionary
          binary_encoding_dict.update({labels_train[i] : encoding})

          #store the encoding in a list for checking in next step
          encoding_list.append(encoding)


      #clear up memory
      del encoding_list
  #     del overlap_list

      #new driftreport metric _1010_activations_dict
      _1010_activations_dict = {}
      for key in binary_encoding_dict:
        sumcalc = (mdf_train[sbs3_column] == key).sum() 
        ratio = sumcalc / mdf_train[sbs3_column].shape[0]
        _1010_activations_dict.update({key:ratio})


      #replace the cateogries in train set via ordinal trasnformation
      mdf_train[sbs3_column] = mdf_train[sbs3_column].replace(binary_encoding_dict) 

      #in test set, we'll need to strike any categories that weren't present in train
      #first let'/s identify what applies
      testspecificcategories = list(set(labels_test)-set(labels_train))

      #so we'll just replace those items with our plug value
      testplug_dict = dict(zip(testspecificcategories, ['zzzinfill'] * len(testspecificcategories)))
      mdf_test[sbs3_column] = mdf_test[sbs3_column].replace(testplug_dict)  

      #now we'll apply the 1010 transformation to the test set
      mdf_test[sbs3_column] = mdf_test[sbs3_column].replace(binary_encoding_dict)    

      #ok let's create a list of columns to store each entry of the binary encoding
      _1010_columnlist = []

      for i in range(binary_column_count):

        _1010_columnlist.append(column + suffix + '_' + str(i))

      suffixoverlap_results = \
      self._df_check_suffixoverlap(mdf_train, _1010_columnlist, suffixoverlap_results)

      #now let's store the encoding
      i=0
      for _1010_column in _1010_columnlist:

        mdf_train[_1010_column] = mdf_train[sbs3_column].str.slice(i,i+1).astype(np.int8)

        mdf_test[_1010_column] = mdf_test[sbs3_column].str.slice(i,i+1).astype(np.int8)

        i+=1

      #now delete the support column
      del mdf_train[sbs3_column]
      del mdf_test[sbs3_column]

      for entry in newcolumns:
        del mdf_train[entry]
        del mdf_test[entry]

      #now store the column_dict entries
      categorylist = _1010_columnlist

      column_dict_list = []

      for tc in categorylist:

  #                                   '_1010_overlap_replace' : overlap_replace, \
        normalization_dict = {tc : {'suffix' : suffix, \
                                    'test_same_as_train' : test_same_as_train, \
                                    '_1010_binary_encoding_dict' : binary_encoding_dict, \
                                    '_1010_binary_column_count' : binary_column_count, \
                                    '_1010_activations_dict' : _1010_activations_dict, \
                                    'categorylist' : categorylist, \
                                    'overlap_dict' : overlap_dict, \
                                    'splt_newcolumns_sbs3'   : newcolumns, \
                                    'concurrent_activations' : concurrent_activations, \
                                    'minsplit' : minsplit, \
                                    'preint_newcolumns' : preint_newcolumns, \
                                    'int_headers' : int_headers, \
                                    'int_labels_dict' : int_labels_dict, \
                                    'inverse_int_labels_dict' : inverse_int_labels_dict}}

        column_dict = {tc : {'category' : 'sbs3', \
                             'origcategory' : category, \
                             'normalization_dict' : normalization_dict, \
                             'origcolumn' : column, \
                             'inputcolumn' : column, \
                             'columnslist' : categorylist, \
                             'categorylist' : categorylist, \
                             'infillmodel' : False, \
                             'infillcomplete' : False, \
                             'suffixoverlap_results' : suffixoverlap_results, \
                             'deletecolumn' : False}}

        column_dict_list.append(column_dict.copy())
      
    else:
      
      column_dict_list = []
    
    return mdf_train, mdf_test, column_dict_list

  def _process_hash(self, mdf_train, mdf_test, column, category, postprocess_dict, params = {}):
    """
    #applies the "hashing trick" to encode categoric sets
    #returning a set of columns with integers corresponding to words from set vocabulary
    #this is intended for sets with very high cardinality
    #note that the same integer may be returned in different columns 
    #for same word found in different entries
    #works by segregating entries into a list of words based on space seperator
    #stripping any special characters
    #and hashing each word with hashlib md5 hashing algorithm
    #which is converted to integer and taken remainder from a division by vocab_size
    #where vocab_size is based on heuristic
    #the heuristic derives vocab_size based on number of unique entries found in train set times the multipler
    #where if that result is greater than the cap then the heuristic reverts to the cap as vocab_size
    #where for hash the number of unique entries is calculated after extracting words from entries
    #note that if vocab_size is not large enough some of words may be returned with encoding overlap
    #returns set of columns with suffix appenders '_hash_#' where # is integer
    #unless only returning one column then suffix appender is just '_hash'
    #note that entries with fewer words than max word count are padded out with 0
    #also accepts parameter for excluded_characters, space
    #uppercase conversion if desired is performed externally by the UPCS transform
    #if space passed as '' then word extraction doesn't take place
    #user can manually specify a vocab_size with vocab-size parameter
    """
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
      
    if 'vocab_size' in params:
      vocab_size = params['vocab_size']
    else:
      vocab_size = False
      
    if 'heuristic_multiplier' in params:
      heuristic_multiplier = params['heuristic_multiplier']
    else:
      heuristic_multiplier = 2
      
    if 'heuristic_cap' in params:
      heuristic_cap = params['heuristic_cap']
    else:
      heuristic_cap = 1024
    
    if 'excluded_characters' in params:
      excluded_characters = params['excluded_characters']
    else:
      excluded_characters = [',', '.', '?', '!', '(', ')']

    if 'space' in params:
      space = params['space']
    else:
      space = ' '

    if 'max_column_count' in params:
      max_column_count = params['max_column_count']
    else:
      max_column_count = False

    #salt can be passed as arbitrary string to ensure privacy of encoding basis
    if 'salt' in params:
      salt = params['salt']
    else:
      salt = ''
      
    #accepts either 'hash' or 'md5', 
    #where hash is quicker since uses native python function instead of hashlib
    if 'hash_alg' in params:
      hash_alg = params['hash_alg']
    else:
      hash_alg = 'hash'

    if inplace is not True:
      
      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self._df_copy_train(mdf_train, column, column + '_hash', suffixoverlap_results)
      
      mdf_test[column + '_hash'] = mdf_test[column].copy()
    
    else:
      
      suffixoverlap_results = \
      self._df_check_suffixoverlap(mdf_train, column + '_hash', suffixoverlap_results)
      
      mdf_train.rename(columns = {column : column + '_hash'}, inplace = True)
      mdf_test.rename(columns = {column : column + '_hash'}, inplace = True)
      
    #convert column to string, note this means that missing data converted to 'nan'
    mdf_train[column + '_hash'] = mdf_train[column + '_hash'].astype(str)
    mdf_test[column + '_hash'] = mdf_test[column + '_hash'].astype(str)
    
    #now scrub special characters
    for scrub_punctuation in excluded_characters:
      mdf_train[column + '_hash'] = mdf_train[column + '_hash'].str.replace(scrub_punctuation, '')
      mdf_test[column + '_hash'] = mdf_test[column + '_hash'].str.replace(scrub_punctuation, '')
      
    if hash_alg == 'md5':
      from hashlib import md5
    
    #define some support functions
    def _assemble_wordlist(string):
      """
      converts a string to list of words by splitting words from space characters
      assumes any desired special characters have already been stripped
      """

      wordlist = []
      j = 0
      
      if max_column_count is False:
        for i in range(len(string)+1):
          if i < len(string):
            if string[i] == space:
              if i > 0:
                if string[j] != space:
                  wordlist.append(string[j:i])
              j = i+1

          else:
            if j < len(string):
              if string[j] != space:
                wordlist.append(string[j:i])
      
      #else if we have a cap on number of returned columns
      else:
        wordlist_length = 0
        for i in range(len(string)+1):
          if i < len(string):
            if string[i] == space:
              if i > 0:
                if string[j] != space:
                  wordlist.append(string[j:i])
                  wordlist_length += 1
                  if wordlist_length == max_column_count - 1:
                    j = i+1
                    break
              j = i+1

          else:
            if j < len(string):
              if string[j] != space:
                wordlist.append(string[j:i])
                wordlist_length += 1
                j = i+1
                if wordlist_length == max_column_count - 1:
                  break

        if wordlist_length == max_column_count - 1:
          if j < len(string):
            wordlist.append(string[j:len(string)])

      return wordlist
    
    #now convert entries to lists of words
    #e.g. this converts "Two words" to ['Two', 'words']
    #if you don't want to split words can pass space = ''
    if space != '':
      mdf_train[column + '_hash'] = mdf_train[column + '_hash'].transform(_assemble_wordlist)
      mdf_test[column + '_hash'] = mdf_test[column + '_hash'].transform(_assemble_wordlist)
    else:
      mdf_train[column + '_hash'] = mdf_train[column + '_hash'].transform(lambda x: [x])
      mdf_test[column + '_hash'] = mdf_test[column + '_hash'].transform(lambda x: [x])
      
    #if user didn't specify vocab_size then derive based on heuristic
    if vocab_size is False:
      #now let's derive vocab_size from train set, first convert all entries to a list of lists
      temp_list = pd.Series(mdf_train[column + '_hash']).tolist()
      #this flattens the list of lists
      temp_list = [item for items in temp_list for item in items]
      #consolidate redundant entries
      temp_list = set(temp_list)
      #get length
      vocab_size = len(temp_list)
      del temp_list
      #calculate the vocab_size based on heuristic_multiplier and heuristic_cap
      vocab_size = int(vocab_size * heuristic_multiplier)
      if vocab_size > heuristic_cap:
        vocab_size = int(heuristic_cap)
        
    def _md5_hash(wordlist):
      """
      applies an md5 hashing to the list of words
      this conversion to ingtegers is known as "the hashing trick"
      md5 is partly inspired by tensorflow keras_preprocessing hashing_trick function
      requires importing from hashlib import md5
      here n is the range of integers for vocabulary
      0 is reserved for use to pad lists of shorter length
      """
      if hash_alg == 'md5':
        return [int(md5((salt + word).encode()).hexdigest(), 16) % (vocab_size-1) + 1 for word in wordlist]
      else:
        return [hash(salt + word) % (vocab_size-1) + 1 for word in wordlist]
        
    #now apply hashing to convert to integers based on vocab_size
    mdf_train[column + '_hash'] = mdf_train[column + '_hash'].transform(_md5_hash)
    mdf_test[column + '_hash'] = mdf_test[column + '_hash'].transform(_md5_hash)
    
    #get max length, i.e. for entry with most words
    max_length = mdf_train[column + '_hash'].transform(len).max()
    
    def _pad_hash(hash_list):
      """
      ensures hashing lists are all same length by padding shorter length lists with 0
      """
      padcount = max_length - len(hash_list)
      if padcount >= 0:
        pad = []
        for i in range(padcount):
          pad = pad + [0]
        hash_list = hash_list + pad
      else:
        #for test data we'll trim if max_length greater than max_length from train data
        hash_list = hash_list[:max_length]

      return hash_list
        
    #the other entries are padded out with 0 to reach same length, if a train entry has longer length it is trimmed
    mdf_train[column + '_hash'] = \
    mdf_train[column + '_hash'].transform(_pad_hash)
    mdf_test[column + '_hash'] = \
    mdf_test[column + '_hash'].transform(_pad_hash)
    
    if max_length > 1:

      hashcolumns = []
      for i in range(max_length):

        hash_column = column + '_hash_' + str(i)

        hashcolumns += [hash_column]

        #check for column header overlap
        suffixoverlap_results = \
        self._df_check_suffixoverlap(mdf_train, hash_column, suffixoverlap_results)

        #now populate the column with i'th entry from hashed list
        mdf_train[hash_column] = mdf_train[column + '_hash'].transform(lambda x: x[i])
        mdf_test[hash_column] = mdf_test[column + '_hash'].transform(lambda x: x[i])

      #remove support column
      del mdf_train[column + '_hash']
      del mdf_test[column + '_hash']
      
    else:
      hashcolumns = [column + '_hash']
      
      #now populate the column with i'th entry from hashed list
      mdf_train[column + '_hash'] = mdf_train[column + '_hash'].transform(lambda x: x[0])
      mdf_test[column + '_hash'] = mdf_test[column + '_hash'].transform(lambda x: x[0])

    #returned data type is conditional on the size of encoding space
    for hashcolumn in hashcolumns:

      if vocab_size < 254:
        mdf_train[hashcolumn] = mdf_train[hashcolumn].astype(np.uint8)
        mdf_test[hashcolumn] = mdf_test[hashcolumn].astype(np.uint8)
      elif vocab_size < 65534:
        mdf_train[hashcolumn] = mdf_train[hashcolumn].astype(np.uint16)
        mdf_test[hashcolumn] = mdf_test[hashcolumn].astype(np.uint16)
      else:
        mdf_train[hashcolumn] = mdf_train[hashcolumn].astype(np.uint32)
        mdf_test[hashcolumn] = mdf_test[hashcolumn].astype(np.uint32)
    
    column_dict_list = []

    for hc in hashcolumns:
      
      hashnormalization_dict = {hc : {'hashcolumns' : hashcolumns, \
                                      'col_count' : max_length, \
                                      'vocab_size_hash' : vocab_size, \
                                      'heuristic_multiplier' : heuristic_multiplier, \
                                      'heuristic_cap' : heuristic_cap, \
                                      'max_length' : max_length, \
                                      'excluded_characters' : excluded_characters, \
                                      'space' : space, \
                                      'salt' : salt, \
                                      'max_column_count' : max_column_count, \
                                      'hash_alg' : hash_alg}}
      
      column_dict = { hc : {'category' : 'hash', \
                           'origcategory' : category, \
                           'normalization_dict' : hashnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : hashcolumns, \
                           'categorylist' : hashcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
      
    return mdf_train, mdf_test, column_dict_list

  def _process_hs10(self, mdf_train, mdf_test, column, category, postprocess_dict, params = {}):
    """
    #applies the "hashing trick" to encode categoric sets
    #returning a set of columns binary encoded corresponding to integers returned from hash
    #this is intended for sets with very high cardinality
    #note that the same activation set may be returned for different entries
    #works by hashing each entry with hashlib md5 hashing algorithm
    #which is converted to integer and taken remainder from a division by vocab_size
    #where vocab_size is passed parameter intended to align with vocabulary size defaulting to 128
    #note that if vocab_size is not large enough some of words may be returned with encoding overlap
    #returns set of columns with suffix appenders '_hs10_#' where # is integer
    #uppercase conversion if desired is performed externally by the UPCS transform
    #applies a heuristic to
    #set a vocab_size based on number unique entries times heuristic_multiplier parameter which defaults to 2
    #also accepts heuristic_cap parameter where if unique * heuristic_muyltipler > heuristic_cap
    #then vocab_size = heuristic_cap
    #or user can manually specify a vocab_size instead of relying on heuristic
    """
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
      
    if 'vocab_size' in params:
      vocab_size = params['vocab_size']
    else:
      vocab_size = False
      
    if 'heuristic_multiplier' in params:
      heuristic_multiplier = params['heuristic_multiplier']
    else:
      heuristic_multiplier = 2
      
    if 'heuristic_cap' in params:
      heuristic_cap = params['heuristic_cap']
    else:
      heuristic_cap = 1024

    #salt can be passed as arbitrary string to ensure privacy of encoding basis
    if 'salt' in params:
      salt = params['salt']
    else:
      salt = ''

    #a list of strings that are scrubbed from entries e.g. punctuations
    if 'excluded_characters' in params:
      excluded_characters = params['excluded_characters']
    else:
      excluded_characters = []
      
    #accepts either 'hash' or 'md5', 
    #where hash is quicker since uses native python function instead of hashlib 
    #(md5 was the original basis, hash is new default)
    if 'hash_alg' in params:
      hash_alg = params['hash_alg']
    else:
      hash_alg = 'hash'

    if inplace is not True:
      
      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self._df_copy_train(mdf_train, column, column + '_hs10', suffixoverlap_results)
      
      mdf_test[column + '_hs10'] = mdf_test[column].copy()
    
    else:
      
      suffixoverlap_results = \
      self._df_check_suffixoverlap(mdf_train, column + '_hs10', suffixoverlap_results)
      
      mdf_train.rename(columns = {column : column + '_hs10'}, inplace = True)
      mdf_test.rename(columns = {column : column + '_hs10'}, inplace = True)
      
    #convert column to string, note this means that missing data converted to 'nan'
    mdf_train[column + '_hs10'] = mdf_train[column + '_hs10'].astype(str)
    mdf_test[column + '_hs10'] = mdf_test[column + '_hs10'].astype(str)

    #now scrub special characters
    for scrub_punctuation in excluded_characters:
      mdf_train[column + '_hs10'] = mdf_train[column + '_hs10'].str.replace(scrub_punctuation, '')
      mdf_test[column + '_hs10'] = mdf_test[column + '_hs10'].str.replace(scrub_punctuation, '')
    
    if vocab_size is False:
      #calculate the vocab_size based on heuristic_multiplier and heuristic_cap
      vocab_size = int(mdf_train[column + '_hs10'].nunique() * heuristic_multiplier)
      if vocab_size > heuristic_cap:
        vocab_size = int(heuristic_cap)
      
    if hash_alg == 'md5':
      from hashlib import md5
    
    def _md5_hash(entry):
      """
      applies hashing to the list of words
      this conversion to ingtegers is known as "the hashing trick"
      requires importing from hashlib import md5 if hash_alg = "md5"
      here n is the range of integers for vocabulary
      """
      if hash_alg == 'md5':
        return int(md5((salt + entry).encode()).hexdigest(), 16) % (vocab_size)
      else:
        return hash(salt + entry) % (vocab_size)

    #now apply hashing to convert to integers based on vocab_size
    mdf_train[column + '_hs10'] = mdf_train[column + '_hs10'].transform(_md5_hash)
    mdf_test[column + '_hs10'] = mdf_test[column + '_hs10'].transform(_md5_hash)

    binary_column_count = int(np.ceil(np.log2(vocab_size)))
    
    #convert integer encoding to binary
    mdf_train[column + '_hs10'] = mdf_train[column + '_hs10'].transform(bin)
    mdf_test[column + '_hs10'] = mdf_test[column + '_hs10'].transform(bin)
    
    #convert format to string of digits
    mdf_train[column + '_hs10'] = mdf_train[column + '_hs10'].str[2:]
    mdf_test[column + '_hs10'] = mdf_test[column + '_hs10'].str[2:]
    
    #pad out zeros
    mdf_train[column + '_hs10'] = mdf_train[column + '_hs10'].str.zfill(binary_column_count)
    mdf_test[column + '_hs10'] = mdf_test[column + '_hs10'].str.zfill(binary_column_count)
    
    hashcolumns = []
    for i in range(binary_column_count):

      hash_column = column + '_hs10_' + str(i)
      
      hashcolumns += [hash_column]
      
      #check for column header overlap
      suffixoverlap_results = \
      self._df_check_suffixoverlap(mdf_train, hash_column, suffixoverlap_results)
      
      #now populate the column with i'th entry from hashed list
      mdf_train[hash_column] = mdf_train[column + '_hs10'].str[i].astype(np.int8)
      mdf_test[hash_column] = mdf_test[column + '_hs10'].str[i].astype(np.int8)
    
    #remove support column
    del mdf_train[column + '_hs10']
    del mdf_test[column + '_hs10']
    
    column_dict_list = []

    for hc in hashcolumns:
      
      hashnormalization_dict = {hc : {'hashcolumns' : hashcolumns, \
                                      'col_count' : binary_column_count, \
                                      'vocab_size' : vocab_size, \
                                      'heuristic_multiplier' : heuristic_multiplier, \
                                      'heuristic_cap' : heuristic_cap, \
                                      'salt' : salt, \
                                      'excluded_characters' : excluded_characters, \
                                      'hash_alg' : hash_alg}}      
      
      column_dict = { hc : {'category' : 'hs10', \
                           'origcategory' : category, \
                           'normalization_dict' : hashnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : hashcolumns, \
                           'categorylist' : hashcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
      
    return mdf_train, mdf_test, column_dict_list

  def _process_srch(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    """
    #process_srch(mdf_train, mdf_test, column, category)
    #preprocess column with categorical entries as strings
    #relies on user passed list of strings in search parameter
    #string parses unique entries to identify overlaps with search strings
    #when overlap found returns a column with boolean activation identifiers
    
    #note this differs from original srch in that makes use of pandas str.contains
    #which is expected to be more efficient for unbounded sets
    
    #for example, if a categoical set consisted of unique values 
    #['west', 'north', 'northwest']
    #and a user passed the search parameter as ['west']
    #then a new column would be returned 
    #with activations corresponding to entries of 'west' and 'northwest'
    
    #note that search parameter can include lists of search terms embedded in the list
    #which embedded lists will be aggregated to a single activation
    #for example if we want single activation for female names could pass search = [['Ms.', 'Miss', 'Mrs']] etc
    
    #note this returns all zeros in a column if search value not found
    
    #note returned coluymns are named by search term, e
    #e.g. column + '_srch_' + str(search)
    
    #note that search terms are converted to strings and compared to columns cast as strings

    #missing values are ignored by default
    """
    
    suffixoverlap_results = {}
        
    if 'search' in params:
      search = params['search']
    else:
      search = []
      
    if 'case' in params:
      case = params['case']
    else:
      case = True
      
    #we'll create mirror to account for any embdded lists of search terms for aggregation
    search_preflattening = search.copy()
    #this is kind of hacky just to reuse code below resetting this list to repopulate
    search = []
    aggregated_dict = {}
    
    for entry in search_preflattening:
      if type(entry) != type([]):
        search.append(str(entry))
      else:
        aggregated_dict.update({str(entry[-1]):[]})
        for entry2 in entry[0:-1]:
          search.append(entry2)
          aggregated_dict[str(entry[-1])].append(str(entry2))
        for entry2 in entry[-1:]:
          search.append(entry2)
    
    newcolumns = []
    search_dict = {}
    for searchitem in search:
      search_dict.update({column + '_srch_' + str(searchitem) : str(searchitem)})
      
    for newcolumn in search_dict:
      suffixoverlap_results = \
      self._df_check_suffixoverlap(mdf_train, newcolumn, suffixoverlap_results)
      
      mdf_train[newcolumn] = \
      np.where(mdf_train[column].astype(str).str.contains(search_dict[newcolumn], case=case, regex=False), 1, 0)
      
      mdf_test[newcolumn] = \
      np.where(mdf_test[column].astype(str).str.contains(search_dict[newcolumn], case=case, regex=False), 1, 0)
    
    newcolumns = list(search_dict)
    
    #now we'll address any aggregations fo search terms
    #from search parameter passed with embedded list of search terms
          
    #then after populating activations, we'll put this below
    #inverse_search_dict has key of search term and value of column for activations
    inverse_search_dict = {value:key for key,value in search_dict.items()}
    newcolumns_before_aggregation = newcolumns.copy()
    
    #now we consolidate activations
    #note that this only runs when aggregated_dict was populated with an embedded list of search terms
    for aggregated_dict_key in aggregated_dict:
      aggregated_dict_key_column = inverse_search_dict[aggregated_dict_key]
      
      for target_for_aggregation in aggregated_dict[aggregated_dict_key]:
        target_for_aggregation_column = inverse_search_dict[target_for_aggregation]
        
        mdf_train[aggregated_dict_key_column] = \
        np.where(mdf_train[target_for_aggregation_column] == 1, 1, mdf_train[aggregated_dict_key_column])
        mdf_test[aggregated_dict_key_column] = \
        np.where(mdf_test[target_for_aggregation_column] == 1, 1, mdf_test[aggregated_dict_key_column])
        
        del mdf_train[target_for_aggregation_column]
        del mdf_test[target_for_aggregation_column]
        
        newcolumns.remove(target_for_aggregation_column)
    
    for newcolumn in newcolumns:

      mdf_train[newcolumn] = mdf_train[newcolumn].astype(np.int8)
      mdf_test[newcolumn] = mdf_test[newcolumn].astype(np.int8)
    
    column_dict_list = []

    for tc in newcolumns:

      textnormalization_dict = {tc : {'search_dict' : search_dict, \
                                      'inverse_search_dict' : inverse_search_dict, \
                                      'srch_newcolumns_srch'   : newcolumns, \
                                      'newcolumns_before_aggregation' : newcolumns_before_aggregation, \
                                      'search' : search, \
                                      'search_preflattening' : search_preflattening, \
                                      'aggregated_dict' : aggregated_dict, \
                                      'case' : case}}
      
      column_dict = {tc : {'category' : 'srch', \
                           'origcategory' : category, \
                           'normalization_dict' : textnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : newcolumns, \
                           'categorylist' : newcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
      
    if len(newcolumns) == 0:
      
      column_dict_list = []
    
    return mdf_train, mdf_test, column_dict_list

  def _process_src2(self, mdf_train, mdf_test, column, category, \
                        postprocess_dict, params = {}):
    """
    #process_src2(mdf_train, mdf_test, column, category)
    #preprocess column with categorical entries as strings
    #relies on user passed list of strings in search parameter
    #string parses unique entries to identify overlaps with search strings
    #when overlap found returns a column with boolean activation identifiers
    
    #for example, if a categoical set consisted of unique values 
    #['west', 'north', 'northwest']
    #and a user passed the search parameter as ['west']
    #then a new column would be returned 
    #with activations corresponding to entries of 'west' and 'northwest'

    #missing values are ignored by default
    
    #assumes that unique values of test set are same or subset of train set
    #for more efficient application in postmunge
    """
    
    suffixoverlap_results = {}
        
    if 'search' in params:
      search = params['search']
    else:
      search = []
    
    #first we find overlaps from mdf_train
    
    unique_list = list(mdf_train[column].unique())

    unique_list = list(map(str, unique_list))
    
#     maxlength = max(len(x) for x in unique_list)
    
#     overlap_lengths = list(range(maxlength - 1, minsplit, -1))

    #we'll create mirror to account for any embdded lists of search terms for aggregation
    search_preflattening = search.copy()
    #this is kind of hacky just to reuse code below resetting this list to repopulate
    search = []
    aggregated_dict = {}
    
    for entry in search_preflattening:
      if type(entry) != type([]):
        search.append(str(entry))
      else:
        aggregated_dict.update({str(entry[-1]):[]})
        for entry2 in entry[0:-1]:
          search.append(entry2)
          aggregated_dict[str(entry[-1])].append(str(entry2))
        for entry2 in entry[-1:]:
          search.append(entry2)

    #we'll populate overlap_dict as
    #{search_string : [list of associate categories with that overlap found]}
    
    overlap_dict = {}
    
    for search_string in search:
      
      overlap_dict.update({search_string : []})
    
    for search_string in search:
      
      len_search_string = len(search_string)
    
      for unique in unique_list:
        
        len_unique = len(unique)
        
        if len_unique >= len_search_string:
          
          nbr_iterations = len_unique - len_search_string
          
          for i in range(nbr_iterations + 1):
            
            extract = unique[i:(len_search_string+i)]
            
            if extract in search:
              
              overlap_dict[extract].append(unique)
    
#     #now for mdf_test
    
#     unique_list_test = list(mdf_test[column].unique())

#     unique_list_test = list(map(str, unique_list_test))

#     test_overlap_dict = {}
    
#     for search_string in search:
      
#       test_overlap_dict.update({search_string : []})
    

#     train_keys = list(overlap_dict)

#     train_keys.sort(key = len, reverse=True)

#     for dict_key in train_keys:

#       for unique_test in unique_list_test:

#         len_key = len(dict_key)

#         if len(unique_test) >= len_key:

#           nbr_iterations4 = len(unique_test) - len_key

#           for l in range(nbr_iterations4 + 1):

#             extract4 = unique_test[l:(len_key+l)]

#             if extract4 == dict_key:

#               test_overlap_dict[dict_key].append(unique_test)
    
    newcolumns = []

    for dict_key in overlap_dict:
      
      if len(overlap_dict[dict_key]) > 0:

        newcolumn = column + '_src2_' + dict_key

        mdf_train, suffixoverlap_results = \
        self._df_copy_train(mdf_train, column, newcolumn, suffixoverlap_results)
        
        mdf_test[newcolumn] = mdf_test[column].copy()

        mdf_train[newcolumn] = mdf_train[newcolumn].astype(str)
        mdf_test[newcolumn] = mdf_test[newcolumn].astype(str)

        mdf_train[newcolumn] = mdf_train[newcolumn].isin(overlap_dict[dict_key])
#         mdf_train[newcolumn] = mdf_train[newcolumn].astype(np.int8)

        mdf_test[newcolumn] = mdf_test[newcolumn].isin(overlap_dict[dict_key])
#         mdf_test[newcolumn] = mdf_test[newcolumn].astype(np.int8)

        newcolumns.append(newcolumn)
    
    #now in case there are any aggregated activations, inspired by approach in srch
    inverse_search_dict = dict(zip(search, newcolumns))
    newcolumns_before_aggregation = newcolumns.copy()
    
    #now we consolidate activations
    #note that this only runs when aggregated_dict was populated with an embedded list of search terms
    for aggregated_dict_key in aggregated_dict:
      aggregated_dict_key_column = inverse_search_dict[aggregated_dict_key]
      
      for target_for_aggregation in aggregated_dict[aggregated_dict_key]:
        target_for_aggregation_column = inverse_search_dict[target_for_aggregation]
        
        mdf_train[aggregated_dict_key_column] = \
        np.where(mdf_train[target_for_aggregation_column] == 1, 1, mdf_train[aggregated_dict_key_column])
        mdf_test[aggregated_dict_key_column] = \
        np.where(mdf_test[target_for_aggregation_column] == 1, 1, mdf_test[aggregated_dict_key_column])
        
        del mdf_train[target_for_aggregation_column]
        del mdf_test[target_for_aggregation_column]
        
        newcolumns.remove(target_for_aggregation_column)
        
    for newcolumn in newcolumns:
      mdf_train[newcolumn] = mdf_train[newcolumn].astype(np.int8)
      mdf_test[newcolumn] = mdf_test[newcolumn].astype(np.int8)
    
    column_dict_list = []

    for tc in newcolumns:

      textnormalization_dict = {tc : {'overlap_dict' : overlap_dict, \
                                      'src2_newcolumns_src2'   : newcolumns, \
                                      'newcolumns_before_aggregation' : newcolumns_before_aggregation, \
                                      'search' : search, \
                                      'inverse_search_dict' : inverse_search_dict, \
                                      'aggregated_dict' : aggregated_dict, \
                                      'search_preflattening' : search_preflattening}}
      
      column_dict = {tc : {'category' : 'src2', \
                           'origcategory' : category, \
                           'normalization_dict' : textnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : newcolumns, \
                           'categorylist' : newcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
      
    if len(newcolumns) == 0:
      
      column_dict_list = []
    
    return mdf_train, mdf_test, column_dict_list
  
  def _process_src3(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    """
    #process_src3(mdf_train, mdf_test, column, category)
    #preprocess column with categorical entries as strings
    #relies on user passed list of strings in search parameter
    #string parses unique entries to identify overlaps with search strings
    #when overlap found returns a column with boolean activation identifiers
    
    #for example, if a categoical set consisted of unique values 
    #['west', 'north', 'northwest']
    #and a user passed the search parameter as ['west']
    #then a new column would be returned 
    #with activations corresponding to entries of 'west' and 'northwest'

    #missing values are ignored by default
    
    #where srch is preferred for unbounded range of unique values
    
    #and src2 preferred when have bounded range of unique values for both train & test
    
    #and speculation is that src3 may be preferred when have a bounded
    #range of unique values but still want capacity to handle values in 
    #test set not found in train set
    """
    
    suffixoverlap_results = {}
        
    if 'search' in params:
      search = params['search']
    else:
      search = []
    
    #first we find overlaps from mdf_train
    
    unique_list = list(mdf_train[column].unique())

    unique_list = list(map(str, unique_list))
    
#     maxlength = max(len(x) for x in unique_list)
    
#     overlap_lengths = list(range(maxlength - 1, minsplit, -1))

    #we'll populate overlap_dict as
    #{search_string : [list of associate categories with that overlap found]}
    
    overlap_dict = {}
    
    for search_string in search:
      
      overlap_dict.update({search_string : []})
    
    for search_string in search:
      
      len_search_string = len(search_string)
    
      for unique in unique_list:
        
        len_unique = len(unique)
        
        if len_unique >= len_search_string:
          
          nbr_iterations = len_unique - len_search_string
          
          for i in range(nbr_iterations + 1):
            
            extract = unique[i:(len_search_string+i)]
            
            if extract in search:
              
              overlap_dict[extract].append(unique)
           
    #now for mdf_test
    
    unique_list_test = list(mdf_test[column].unique())

    unique_list_test = list(map(str, unique_list_test))

    test_overlap_dict = {}
    
    for search_string in search:
      
      test_overlap_dict.update({search_string : []})
    
    train_keys = list(overlap_dict)

    train_keys.sort(key = len, reverse=True)

    for dict_key in train_keys:

      for unique_test in unique_list_test:

        len_key = len(dict_key)

        if len(unique_test) >= len_key:

          nbr_iterations4 = len(unique_test) - len_key

          for l in range(nbr_iterations4 + 1):

            extract4 = unique_test[l:(len_key+l)]

            if extract4 == dict_key:

              test_overlap_dict[dict_key].append(unique_test)
    
    newcolumns = []

    for dict_key in overlap_dict:
      
      if len(overlap_dict[dict_key]) > 0:

        newcolumn = column + '_src3_' + dict_key

        mdf_train, suffixoverlap_results = \
        self._df_copy_train(mdf_train, column, newcolumn, suffixoverlap_results)
        
        mdf_test[newcolumn] = mdf_test[column].copy()

        mdf_train[newcolumn] = mdf_train[newcolumn].astype(str)
        mdf_test[newcolumn] = mdf_test[newcolumn].astype(str)

        mdf_train[newcolumn] = mdf_train[newcolumn].isin(overlap_dict[dict_key])
        mdf_train[newcolumn] = mdf_train[newcolumn].astype(np.int8)

        mdf_test[newcolumn] = mdf_test[newcolumn].isin(test_overlap_dict[dict_key])
        mdf_test[newcolumn] = mdf_test[newcolumn].astype(np.int8)

        newcolumns.append(newcolumn)
    
    column_dict_list = []

    for tc in newcolumns:

      textnormalization_dict = {tc : {'overlap_dict' : overlap_dict, \
                                      'srch_newcolumns_src3'   : newcolumns, \
                                      'search' : search}}
      
      column_dict = {tc : {'category' : 'src3', \
                           'origcategory' : category, \
                           'normalization_dict' : textnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : newcolumns, \
                           'categorylist' : newcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
      
    if len(newcolumns) == 0:
      
      column_dict_list = []
    
    return mdf_train, mdf_test, column_dict_list

  def _process_src4(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    """
    #process_src4(mdf_train, mdf_test, column, category)
    #preprocess column with categorical entries as strings
    #relies on user passed list of strings in search parameter
    #string parses unique entries to identify overlaps with search strings
    #when overlap found returns a column with boolean activation identifiers
    
    #note this differs from original srch in that makes use of pandas str.contains
    #which is expected to be more efficient for unbounded sets
    
    #for example, if a categoical set consisted of unique values 
    #['west', 'north', 'northwest']
    #and a user passed the search parameter as ['west']
    #then a new column would be returned 
    #with activations corresponding to entries of 'west' and 'northwest'
    
    #note this returns all zeros in a column if search value not found
    
    #note returned coluymns are named by search term, e
    #e.g. column + '_srch_' + str(search)
    
    #note that search terms are converted to strings and compared to columns cast as strings

    #missing values are ignored by default
    
    #src4 builds on the srch by converting to an ordinal activation
    #with 0 reserved for no activations
    #note that if an entry was activated for multiple search terms
    #the order of entries in search parameter will dictate the final encoding
    #(e.g. entries at end of list are prioritized over beginning)
    """
    
    suffixoverlap_results = {}
        
    if 'search' in params:
      search = params['search']
    else:
      search = []
      
    if 'case' in params:
      case = params['case']
    else:
      case = True
      
    #we'll create mirror to account for any embdded lists of search terms for aggregation
    search_preflattening = search.copy()
    #this is kind of hacky just to reuse code below resetting this list to repopulate
    search = []
    aggregated_dict = {}
    
    for entry in search_preflattening:
      if type(entry) != type([]):
        search.append(str(entry))
      else:
        aggregated_dict.update({str(entry[-1]):[]})
        for entry2 in entry[0:-1]:
          search.append(entry2)
          aggregated_dict[str(entry[-1])].append(str(entry2))
        for entry2 in entry[-1:]:
          search.append(entry2)
    
    newcolumns = []
    search_dict = {}
    for searchitem in search:
      search_dict.update({column + '_src4_' + str(searchitem) : str(searchitem)})
      
    for newcolumn in search_dict:
      suffixoverlap_results = \
      self._df_check_suffixoverlap(mdf_train, newcolumn, suffixoverlap_results)
      
      mdf_train[newcolumn] = \
      np.where(mdf_train[column].astype(str).str.contains(search_dict[newcolumn], case=case, regex=False), 1, 0)
      
      mdf_test[newcolumn] = \
      np.where(mdf_test[column].astype(str).str.contains(search_dict[newcolumn], case=case, regex=False), 1, 0)
    
    newcolumns = list(search_dict)

#     for newcolumn in newcolumns:

#       mdf_train[newcolumn] = mdf_train[newcolumn].astype(np.int8)
#       mdf_test[newcolumn] = mdf_test[newcolumn].astype(np.int8)
      
    #ok now let's convert to ordinal for src4
    ordl_dict1 = {}
    ordl_dict2 = {}
    
    #reserve zero for no activations
    i = 1
    for newcolumn in newcolumns:
      ordl_dict1.update({i : newcolumn})
      ordl_dict2.update({newcolumn : i})
      i += 1
      
    suffixoverlap_results = \
    self._df_check_suffixoverlap(mdf_train, column + '_src4', suffixoverlap_results)
      
    mdf_train[column + '_src4'] = 0
    mdf_test[column + '_src4'] = 0
    
    for newcolumn in newcolumns:
      
      mdf_train[column + '_src4'] = \
      np.where(mdf_train[newcolumn] == 1, ordl_dict2[newcolumn], mdf_train[column + '_src4'])
      mdf_test[column + '_src4'] = \
      np.where(mdf_test[newcolumn] == 1, ordl_dict2[newcolumn], mdf_test[column + '_src4'])
      del mdf_train[newcolumn]
      del mdf_test[newcolumn]
      
    #now we'll address any aggregations fo search terms
    #from search parameter passed with embedded list of search terms
          
    #then after populating activations, we'll put this below
    #inverse_search_dict has key of search term and value of column for activations
    inverse_search_dict = {value:key for key,value in search_dict.items()}
#     newcolumns_before_aggregation = newcolumns.copy()
      
    #now we consolidate activations
    #note that this only runs when aggregated_dict was populated with an embedded list of search terms
    for aggregated_dict_key in aggregated_dict:
      aggregated_dict_key_column = inverse_search_dict[aggregated_dict_key]
      aggregated_dict_key_encoding = ordl_dict2[aggregated_dict_key_column]
      
      for target_for_aggregation in aggregated_dict[aggregated_dict_key]:
        target_for_aggregation_column = inverse_search_dict[target_for_aggregation]
        target_for_aggregation_encoding = ordl_dict2[target_for_aggregation_column]
        
        mdf_train[column + '_src4'] = \
        np.where(mdf_train[column + '_src4'] == target_for_aggregation_encoding, aggregated_dict_key_encoding, mdf_train[column + '_src4'])
        mdf_test[column + '_src4'] = \
        np.where(mdf_test[column + '_src4'] == target_for_aggregation_encoding, aggregated_dict_key_encoding, mdf_test[column + '_src4'])

    #we'll base the integer type on number of ordinal entries
    if len(ordl_dict1) < 254:
      mdf_train[column + '_src4'] = mdf_train[column + '_src4'].astype(np.uint8)
      mdf_test[column + '_src4'] = mdf_test[column + '_src4'].astype(np.uint8)
    elif len(ordl_dict1) < 65534:
      mdf_train[column + '_src4'] = mdf_train[column + '_src4'].astype(np.uint16)
      mdf_test[column + '_src4'] = mdf_test[column + '_src4'].astype(np.uint16)
    else:
      mdf_train[column + '_src4'] = mdf_train[column + '_src4'].astype(np.uint32)
      mdf_test[column + '_src4'] = mdf_test[column + '_src4'].astype(np.uint32)
    
    column_dict_list = []
    
    #newcolumns are based on the original srch transform
    #src4_newcolumns are after consolidating to ordinal encoding (single entry)
    src4_newcolumns = [column + '_src4']

    for tc in src4_newcolumns:

      textnormalization_dict = {tc : {'search_dict' : search_dict, \
                                      'inverse_search_dict' : inverse_search_dict, \
                                      'srch_newcolumns_src4' : newcolumns, \
                                      'src4_newcolumns' : src4_newcolumns, \
                                      'search' : search, \
                                      'search_preflattening' : search_preflattening, \
                                      'aggregated_dict' : aggregated_dict, \
                                      'case' : case, \
                                      'ordl_dict1' : ordl_dict1, \
                                      'activations_list' : list(ordl_dict1), \
                                      'ordl_dict2' : ordl_dict2}}
      
      column_dict = {tc : {'category' : 'src4', \
                           'origcategory' : category, \
                           'normalization_dict' : textnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : src4_newcolumns, \
                           'categorylist' : src4_newcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())

    return mdf_train, mdf_test, column_dict_list
  
  def _process_aggt(self, df, column, category, postprocess_dict, params = {}):
    """
    #process_aggt(mdf_train, mdf_test, column, category)
    #preprocess column with categorical entries as strings
    #and aggregates differently spelled duplicates into single representation
    #based on user passed parameter 'aggregate'
    #which is a list of lists, where sublists are the aggregation groups
    #and the final representation will be the final item in list
    #note also supports passing aggregate as a single list of terms without embedded lists
    """
    
    suffixoverlap_results = {}
    
    if 'aggregate' in params:
      aggregate = params['aggregate']
    else:
      aggregate = [[]]
      
    df, suffixoverlap_results = \
    self._df_copy_train(df, column, column + '_aggt', suffixoverlap_results)

    for sublist in aggregate:
      
      if not isinstance(sublist, list):
        
        sublist = aggregate
      
        length_sublist = len(sublist)

        for i in range(length_sublist-1):

          df[column + '_aggt'] = np.where(df[column + '_aggt'] == sublist[i], sublist[-1], df[column + '_aggt'])
          
        break
      
      else:
        
        length_sublist = len(sublist)

        for i in range(length_sublist-1):

          df[column + '_aggt'] = np.where(df[column + '_aggt'] == sublist[i], sublist[-1], df[column + '_aggt'])

    normalization_dict = {column + '_aggt' : {'aggregate' : aggregate}}
    
    nmbrcolumns = [column + '_aggt']
    
    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:
      
      column_dict = { nc : {'category' : 'aggt', \
                           'origcategory' : category, \
                           'normalization_dict' : normalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
        
    return df, column_dict_list

  def _process_strn(self, df, column, category, postprocess_dict, params = {}):
    """
    #process_strn(df, column, category, postprocess_dict)
    #parses string entries and if any strings present returns longest string
    #i.e. character subsets excluding numerical entries
    #entries without strings present subject to infill
    """
    
    suffixoverlap_results = {}
    
    unique_list = list(df[column].unique())

    unique_list = list(map(str, unique_list))
    
    maxlength = max(len(x) for x in unique_list)
    
    overlap_lengths = list(range(maxlength, 0, -1))

    overlap_dict = {}
    
    for overlap_length in overlap_lengths:

      for unique in unique_list:
        
        if unique not in overlap_dict:

          len_unique = len(unique)

          if len_unique >= overlap_length:
            
            if overlap_length > 1:

              nbr_iterations = len_unique - overlap_length

              for i in range(nbr_iterations + 1):
                
                if unique not in overlap_dict:

                  extract = unique[i:(overlap_length+i)]
                  
                  has_number = False
                  
                  for j in range(len(extract)):
                    
                    if self._is_number(extract[j]):
                      
                      has_number = True

  #                 extract_already_in_overlap_dict = False

                  if has_number is False:

                    overlap_dict.update({unique : extract})
                
            #else if overlap_length == 1    
            else:
              
              nbr_iterations = len_unique - overlap_length
              
              in_dict = False

              for i in range(nbr_iterations + 1):
                
                if unique not in overlap_dict:

                  extract = unique[i:(overlap_length+i)]

  #                 extract_already_in_overlap_dict = False
  
                  has_number = False
                  
                  for j in range(len(extract)):
                    
                    if self._is_number(extract[j]):
                      
                      has_number = True

  #                 extract_already_in_overlap_dict = False

                  if has_number is False:
      
                    in_dict = True

                    overlap_dict.update({unique : extract})
                  
              if in_dict is False:

                overlap_dict.update({unique : np.nan})
    
    suffixoverlap_results = \
    self._df_check_suffixoverlap(df, column + '_strn', suffixoverlap_results)
    
    df[column + '_strn'] = df[column].astype(str)
    df[column + '_strn'] = df[column + '_strn'].replace(overlap_dict)

    #replace missing data with training set mean as default infill
    df[column + '_strn'] = df[column + '_strn'].fillna('zzzinfill')
    
#     #a few more metrics collected for driftreport
#     #get maximum value of training column
#     maximum = df[column + '_nmrc'].max()
#     #get minimum value of training column
#     minimum = df[column + '_nmrc'].min()
    
    #create list of columns
    nmbrcolumns = [column + '_strn']

    nmbrnormalization_dict = {column + '_strn' : {'overlap_dict' : overlap_dict}}
#                                                   'mean' : mean, \
#                                                   'maximum' : maximum, \
#                                                   'minimum' : minimum }}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []
    
    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'strn', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
        
    return df, column_dict_list

  def _process_strg(self, df, column, category, postprocess_dict, params = {}):
    '''
    #str function
    #accepts input of integer categoric sets, such as from an ordinal transform
    #and converts to strings for purposes of categoric recognition in some downstream libaries
    #(eg some libraries will treat integer label sets as targets for regression instead of classificaiton)
    #does not perform infill, just converts entries to string
    '''
    
    suffixoverlap_results = {}
      
    strg_column = column + '_strg'
    
    df, suffixoverlap_results = \
    self._df_copy_train(df, column, strg_column, suffixoverlap_results)
    
    df[strg_column] = df[strg_column].astype(str)

    column_dict_list = []

    column_dict = {strg_column : {'category' : 'strg', \
                                 'origcategory' : category, \
                                 'normalization_dict' : {strg_column:{}}, \
                                 'origcolumn' : column, \
                                 'inputcolumn' : column, \
                                 'columnslist' : [strg_column], \
                                 'categorylist' : [strg_column], \
                                 'infillmodel' : False, \
                                 'infillcomplete' : False, \
                                 'suffixoverlap_results' : suffixoverlap_results, \
                                 'deletecolumn' : False}}
    
    #now append column_dict onto postprocess_dict
    column_dict_list.append(column_dict.copy())

    return df, column_dict_list

  def _process_nmrc(self, df, column, category, postprocess_dict, params = {}):
    """
    #process_nmrc(df, column, category, postprocess_dict)
    #parses string entries and if any numbers present returns numbers
    #entries without numbers present subject to infill
    #accepts parameters 
    #convention as numbers/commas/spaces
    #suffix for column suffix identifier
    """
    
    suffixoverlap_results = {}
    
    if 'convention' in params:
      #accepts numbers/commas/spaces
      convention = params['convention']
    else:
      convention = 'numbers'
      
    if 'suffix' in params:
      #accepts string for suffix appender
      suffix = params['suffix']
    else:
      suffix = '_nmrc'
      
    nmrc_column = column + suffix
    
    df, suffixoverlap_results = \
    self._df_copy_train(df, column, nmrc_column, suffixoverlap_results)
    
    unique_list = list(df[nmrc_column].unique())

    unique_list = list(map(str, unique_list))
    
    maxlength = max(len(x) for x in unique_list)
    
    overlap_lengths = list(range(maxlength, 0, -1))

    overlap_dict = {}
    
    for overlap_length in overlap_lengths:

      for unique in unique_list:
        
        if unique not in overlap_dict:

          len_unique = len(unique)

          if len_unique >= overlap_length:
            
            if overlap_length > 1:

              nbr_iterations = len_unique - overlap_length

              for i in range(nbr_iterations + 1):
                
                if unique not in overlap_dict:

                  extract = unique[i:(overlap_length+i)]

  #                 extract_already_in_overlap_dict = False
                  
                  if convention == 'numbers':
                  
                    if self._is_number(extract):

                      overlap_dict.update({unique : float(extract)})
              
                  elif convention == 'commas':
                  
                    if self._is_number_comma(extract):

                      overlap_dict.update({unique : float(extract.replace(',',''))})
                      
                  elif convention == 'spaces':
                  
                    if self._is_number_EU(extract):

                      overlap_dict.update({unique : float(extract[0] + extract[1:-1].replace(' ','').replace('.','').replace(',','.') + extract[-1])})
                      
            #else if overlap_length == 1    
            else:
              
              nbr_iterations = len_unique - overlap_length
              
              in_dict = False

              for i in range(nbr_iterations + 1):
                
                if unique not in overlap_dict:

                  extract = unique[i:(overlap_length+i)]

  #                 extract_already_in_overlap_dict = False

                  if self._is_number(extract):

                    in_dict = True

                    overlap_dict.update({unique : float(extract)})

              if in_dict is False:

                overlap_dict.update({unique : np.nan})
    
    df[nmrc_column] = df[nmrc_column].astype(str)
    df[nmrc_column] = df[nmrc_column].replace(overlap_dict)

    df[nmrc_column] = pd.to_numeric(df[nmrc_column], errors='coerce')
    
    #get mean of training data
    mean = df[nmrc_column].mean()
    if mean != mean:
      mean = 0
      
    #replace missing data with training set mean as default infill
    df[nmrc_column] = df[nmrc_column].fillna(mean)
    
    #a few more metrics collected for driftreport
    #get maximum value of training column
    maximum = df[nmrc_column].max()
    #get minimum value of training column
    minimum = df[nmrc_column].min()
    
    #create list of columns
    nmbrcolumns = [nmrc_column]

    #populate data structures
    nmbrnormalization_dict = {nmrc_column : {'overlap_dict' : overlap_dict, \
                                            'mean' : mean, \
                                            'maximum' : maximum, \
                                            'minimum' : minimum, \
                                            'convention' : convention, \
                                            'suffix' : suffix }}
    
    column_dict_list = []
    
    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'nmrc', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
    
    return df, column_dict_list

  def _process_nmr4(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    """
    #extract numeric partitions from categoric entries, test treated differently than train
    #accepts parameters
    #convention as numbers/commas/spaces
    #suffix for column suffix identifier
    #test_same_as_train as True/False
    #where True copiues overlap_dict from train for test, False parses test entries not found in train
    """
    
    suffixoverlap_results = {}
    
    if 'convention' in params:
      #accepts numbers/commas/spaces
      convention = params['convention']
    else:
      convention = 'numbers'
      
    if 'suffix' in params:
      #accepts string for suffix appender
      suffix = params['suffix']
    else:
      suffix = '_nmr4'
      
    if 'test_same_as_train' in params:
      #accepts boolean
      test_same_as_train = params['test_same_as_train']
    else:
      test_same_as_train = True
      
    nmrc_column = column + suffix
    
    mdf_train, suffixoverlap_results = \
    self._df_copy_train(mdf_train, column, nmrc_column, suffixoverlap_results)
    
    mdf_test[nmrc_column] = mdf_test[column].copy()
    
    #begin parsing train set
    
    unique_list = list(mdf_train[nmrc_column].unique())

    unique_list = list(map(str, unique_list))
    
    maxlength = max(len(x) for x in unique_list)
    
    overlap_lengths = list(range(maxlength, 0, -1))

    overlap_dict = {}
    
    for overlap_length in overlap_lengths:

      for unique in unique_list:
        
        if unique not in overlap_dict:

          len_unique = len(unique)

          if len_unique >= overlap_length:
            
            if overlap_length > 1:

              nbr_iterations = len_unique - overlap_length

              for i in range(nbr_iterations + 1):
                
                if unique not in overlap_dict:

                  extract = unique[i:(overlap_length+i)]

  #                 extract_already_in_overlap_dict = False
                  
                  if convention == 'numbers':
                  
                    if self._is_number(extract):

                      overlap_dict.update({unique : float(extract)})
              
                  elif convention == 'commas':
                  
                    if self._is_number_comma(extract):

                      overlap_dict.update({unique : float(extract.replace(',',''))})
                      
                  elif convention == 'spaces':
                  
                    if self._is_number_EU(extract):

                      overlap_dict.update({unique : float(extract[0] + extract[1:-1].replace(' ','').replace('.','').replace(',','.') + extract[-1])})
                      
            #else if overlap_length == 1    
            else:
              
              nbr_iterations = len_unique - overlap_length
              
              in_dict = False

              for i in range(nbr_iterations + 1):
                
                if unique not in overlap_dict:

                  extract = unique[i:(overlap_length+i)]

  #                 extract_already_in_overlap_dict = False
  
                  if self._is_number(extract):

                    in_dict = True

                    overlap_dict.update({unique : float(extract)})

              if in_dict is False:

                overlap_dict.update({unique : np.nan})
                
    mdf_train[nmrc_column] = mdf_train[nmrc_column].astype(str)
    mdf_train[nmrc_column] = mdf_train[nmrc_column].replace(overlap_dict)
    
    #now test set
    test_unique_list = list(mdf_test[nmrc_column].unique())
    test_unique_list = list(map(str, test_unique_list))
    extra_test_unique = list(set(test_unique_list) - set(unique_list))

    test_overlap_dict = deepcopy(overlap_dict)
    
    if test_same_as_train is True:
      
      for test_unique in extra_test_unique:
        test_overlap_dict.update({str(test_unique) : np.nan})
      
    elif test_same_as_train is False:
      
      testmaxlength = max(len(x) for x in unique_list)

      overlap_lengths = list(range(testmaxlength, 0, -1))

  #     overlap_dict = {}

      for overlap_length in overlap_lengths:

        for unique in extra_test_unique:

          if unique not in test_overlap_dict:

            len_unique = len(unique)

            if len_unique >= overlap_length:

              if overlap_length > 1:

                nbr_iterations = len_unique - overlap_length

                for i in range(nbr_iterations + 1):

                  if unique not in test_overlap_dict:

                    extract = unique[i:(overlap_length+i)]

    #                 extract_already_in_overlap_dict = False
                    
                    if convention == 'numbers':
                    
                      if self._is_number(extract):

                        test_overlap_dict.update({unique : float(extract)})
                  
                    elif convention == 'commas':
                    
                      if self._is_number_comma(extract):

                        test_overlap_dict.update({unique : float(extract.replace(',',''))})
                        
                    elif convention == 'spaces':
                    
                      if self._is_number_EU(extract):

                        test_overlap_dict.update({unique : float(extract[0] + extract[1:-1].replace(' ','').replace('.','').replace(',','.') + extract[-1])})

              #else if overlap_length == 1    
              else:

                nbr_iterations = len_unique - overlap_length

                in_dict = False

                for i in range(nbr_iterations + 1):

                  if unique not in test_overlap_dict:

                    extract = unique[i:(overlap_length+i)]

    #                 extract_already_in_overlap_dict = False
                    
                    if self._is_number(extract):

                      in_dict = True

                      test_overlap_dict.update({unique : float(extract)})
                    
                if in_dict is False:

                  test_overlap_dict.update({unique : np.nan})
    
    #great now that test_overlap_dict is populated
    mdf_test[nmrc_column] = mdf_test[nmrc_column].astype(str)
    mdf_test[nmrc_column] = mdf_test[nmrc_column].replace(test_overlap_dict)

    mdf_train[nmrc_column] = pd.to_numeric(mdf_train[nmrc_column], errors='coerce')
    mdf_test[nmrc_column] = pd.to_numeric(mdf_test[nmrc_column], errors='coerce')

    #get mean of training data
    mean = mdf_train[nmrc_column].mean()
    if mean != mean:
      mean = 0

    #replace missing data with training set mean as default infill
    mdf_train[nmrc_column] = mdf_train[nmrc_column].fillna(mean)
    mdf_test[nmrc_column] = mdf_test[nmrc_column].fillna(mean)
    
    #a few more metrics collected for driftreport
    maximum = mdf_train[nmrc_column].max()
    minimum = mdf_train[nmrc_column].min()
    
    #create list of columns
    nmbrcolumns = [nmrc_column]
    
    #populate data structures
    nmbrnormalization_dict = {nmrc_column : {'overlap_dict' : overlap_dict, \
                                            'mean' : mean, \
                                            'maximum' : maximum, \
                                            'minimum' : minimum, \
                                            'unique_list' : unique_list, \
                                            'maxlength' : maxlength, \
                                            'convention' : convention, \
                                            'suffix' : suffix, \
                                            'test_same_as_train' : test_same_as_train}}
    
    column_dict_list = []
    
    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'nmr4', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
      
    return mdf_train, mdf_test, column_dict_list
  
  def _process_ordl(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    '''
    #process_ordl(mdf_train, mdf_test, column, category)
    #preprocess column with categories into ordinal (sequentuial integer) sets
    #corresponding to (sorted) categories
    #adresses infill with new point which we arbitrarily set as 'zzzinfill'
    #intended to show up as last point in set alphabetically
    #for categories presetn in test set not present in train set use this 'zzz' category
    #as implemented this function seperately encodes numbers and string equivalent (eg 2 != '2')
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
      
    #ordered_overide is boolean to indicate if order of integer encoding basis will 
    #defer to cases when a column is a pandas categorical ordered set
    if 'ordered_overide' in params:
      ordered_overide = params['ordered_overide']
    else:
      ordered_overide = True
      
    #str_convert provides consistent encodings between numbers and string equivalent, eg 2 == '2'
    if 'str_convert' in params:
      str_convert = params['str_convert']
    else:
      str_convert = False
    
    if inplace is not True:
      
      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self._df_copy_train(mdf_train, column, column + '_ordl', suffixoverlap_results)

      mdf_test[column + '_ordl'] = mdf_test[column].copy()
    
    else:
      
      suffixoverlap_results = \
      self._df_check_suffixoverlap(mdf_train, column + '_ordl', suffixoverlap_results)
      
      mdf_train.rename(columns = {column : column + '_ordl'}, inplace = True)
      mdf_test.rename(columns = {column : column + '_ordl'}, inplace = True)
      
    ordered = False
    if ordered_overide:
      if mdf_train[column + '_ordl'].dtype.name == 'category':
        if mdf_train[column + '_ordl'].cat.ordered:
          ordered = True
          labels_train = list(mdf_train[column + '_ordl'].cat.categories)
          if mdf_test[column + '_ordl'].dtype.name == 'category':
            if mdf_test[column + '_ordl'].cat.ordered:
              labels_test = list(mdf_test[column + '_ordl'].cat.categories)
            else:
              ordered = False
          else:
            ordered = False
    
    #convert column to category if it isn't already
    mdf_train[column + '_ordl'] = mdf_train[column + '_ordl'].astype('category')
    mdf_test[column + '_ordl'] = mdf_test[column + '_ordl'].astype('category')

    #if set is categorical we'll need the plug value for missing values included
    if 'zzzinfill' not in mdf_train[column + '_ordl'].cat.categories:
      mdf_train[column + '_ordl'] = mdf_train[column + '_ordl'].cat.add_categories(['zzzinfill'])
    if 'zzzinfill' not in mdf_test[column + '_ordl'].cat.categories:
      mdf_test[column + '_ordl'] = mdf_test[column + '_ordl'].cat.add_categories(['zzzinfill'])

    #replace NA with a dummy variable
    mdf_train[column + '_ordl'] = mdf_train[column + '_ordl'].fillna('zzzinfill')
    mdf_test[column + '_ordl'] = mdf_test[column + '_ordl'].fillna('zzzinfill')

    #replace numerical with string equivalent
    if str_convert is True:
      mdf_train[column + '_ordl'] = mdf_train[column + '_ordl'].astype(str)
      mdf_test[column + '_ordl'] = mdf_test[column + '_ordl'].astype(str)
      if ordered is True:
        labels_train = [str(x) for x in labels_train]
        labels_test = [str(x) for x in labels_test]
    else:
      mdf_train[column + '_ordl'] = mdf_train[column + '_ordl'].astype('object')
      mdf_test[column + '_ordl'] = mdf_test[column + '_ordl'].astype('object')
            
    if ordered is False:
      
      #extract categories for column labels
      #note that .unique() extracts the labels as a numpy array
      labels_train = list(mdf_train[column + '_ordl'].unique())
      labels_train = sorted(labels_train, key=str)
      labels_test = list(mdf_test[column + '_ordl'].unique())
      labels_test = sorted(labels_test, key=str)

    #if infill not present in train set, insert
    if 'zzzinfill' not in labels_train:
      labels_train = labels_train + ['zzzinfill']
#       labels_train.sort()
    if 'zzzinfill' not in labels_test:
      labels_test = labels_test + ['zzzinfill']
#       labels_test.sort()
    
    listlength = len(labels_train)
    
    #____
    #quick check if there are any overlaps between binary encodings and prior unique values in the column
    #as would interfere with the replacement operation
    
    overlap_list = []
    overlap_replace = {}
    for value in labels_train:
      if value in range(listlength):
        overlap_list.append(value)
        
        #here's what we'll replace with, the string suffix is arbitrary and intended as not likely to be in set
        overlap_replace.update({value : str(value) + 'encoding_overlap'})
    
    #here we replace the overlaps with version with jibberish suffix
    if len(overlap_list) > 0:
      
      #then we'll redo the encodings
      
      if ordered is True:
        #this replaces entries with overlap while retaining order
        for foundoverlap in overlap_replace:
          labels_train = [overlap_replace[foundoverlap] if x == foundoverlap else x for x in labels_train]
          labels_test = [overlap_replace[foundoverlap] if x == foundoverlap else x for x in labels_test]
          
        #then replace encoding overlap entries in the returned column

        mdf_train[column + '_ordl'] = mdf_train[column + '_ordl'].replace(overlap_replace)
        mdf_test[column + '_ordl'] = mdf_test[column + '_ordl'].replace(overlap_replace)

      if ordered is False:

        mdf_train[column + '_ordl'] = mdf_train[column + '_ordl'].replace(overlap_replace)
        mdf_test[column + '_ordl'] = mdf_test[column + '_ordl'].replace(overlap_replace)

        #extract categories for column labels
        #note that .unique() extracts the labels as a numpy array
        labels_train = list(mdf_train[column + '_ordl'].unique())
        labels_train = sorted(labels_train, key=str)
        labels_test = list(mdf_test[column + '_ordl'].unique())
        labels_test = sorted(labels_test, key=str)

      #if infill not present in train set, insert
      if 'zzzinfill' not in labels_train:
        labels_train = labels_train + ['zzzinfill']
      if 'zzzinfill' not in labels_test:
        labels_test = labels_test + ['zzzinfill']
      
    #clear up memory
    del overlap_list
    
    #____
    
    #get length of the list, then zip a dictionary from list and range(length)
    #the range values will be our ordinal points to replace the categories
    listlength = len(labels_train)
    ordinal_dict = dict(zip(labels_train, range(listlength)))
    
    #dtype operation is to address edge case if object type drifted to numeric which impacts replace
    if mdf_train[column + '_ordl'].dtype.name != 'object':
      mdf_train[column + '_ordl'] = mdf_train[column + '_ordl'].astype('object')
    
    #replace the cateogries in train set via ordinal trasnformation
    mdf_train[column + '_ordl'] = mdf_train[column + '_ordl'].replace(ordinal_dict)
    
    #in test set, we'll need to strike any categories that weren't present in train
    #first let'/s identify what applies
    testspecificcategories = list(set(labels_test)-set(labels_train))
    
    #so we'll just replace those items with our plug value
    testplug_dict = dict(zip(testspecificcategories, ['zzzinfill'] * len(testspecificcategories)))
    if mdf_test[column + '_ordl'].dtype.name != 'object':
      mdf_test[column + '_ordl'] = mdf_test[column + '_ordl'].astype('object')
    mdf_test[column + '_ordl'] = mdf_test[column + '_ordl'].replace(testplug_dict)
    
    #now we'll apply the ordinal transformation to the test set
    if mdf_test[column + '_ordl'].dtype.name != 'object':
      mdf_test[column + '_ordl'] = mdf_test[column + '_ordl'].astype('object')
    mdf_test[column + '_ordl'] = mdf_test[column + '_ordl'].replace(ordinal_dict)
    
    #just want to make sure these arent' being saved as floats for memory considerations
    if len(ordinal_dict) < 254:
      mdf_train[column + '_ordl'] = mdf_train[column + '_ordl'].astype(np.uint8)
      mdf_test[column + '_ordl'] = mdf_test[column + '_ordl'].astype(np.uint8)
    elif len(ordinal_dict) < 65534:
      mdf_train[column + '_ordl'] = mdf_train[column + '_ordl'].astype(np.uint16)
      mdf_test[column + '_ordl'] = mdf_test[column + '_ordl'].astype(np.uint16)
    else:
      mdf_train[column + '_ordl'] = mdf_train[column + '_ordl'].astype(np.uint32)
      mdf_test[column + '_ordl'] = mdf_test[column + '_ordl'].astype(np.uint32)
    
#     #convert column to category
#     mdf_train[column + '_ordl'] = mdf_train[column + '_ordl'].astype('category')
#     mdf_test[column + '_ordl'] = mdf_test[column + '_ordl'].astype('category')

#     #change data type for memory savings
#     mdf_train[column + '_ordl'] = mdf_train[column + '_ordl'].astype(np.int32)
#     mdf_test[column + '_ordl'] = mdf_test[column + '_ordl'].astype(np.int32)

    #new driftreport metric ordl_activations_dict
    ordl_activations_dict = {}
    for key in ordinal_dict:
      sumcalc = (mdf_train[column+'_ordl'] == ordinal_dict[key]).sum() 
      ratio = sumcalc / mdf_train[column+'_ordl'].shape[0]
      ordl_activations_dict.update({key:ratio})

    inverse_ordinal_dict = {value:key for key,value in ordinal_dict.items()}
    activations_list = list(inverse_ordinal_dict)
    
    categorylist = [column + '_ordl']  
        
    column_dict_list = []
    
    for tc in categorylist:
        
      normalization_dict = {tc : {'ordinal_dict' : ordinal_dict, \
                                  'inverse_ordinal_dict' : inverse_ordinal_dict, \
                                  'activations_list' : activations_list, \
                                  'ordinal_overlap_replace' : overlap_replace, \
                                  'ordl_activations_dict' : ordl_activations_dict, \
                                  'ordered_overide' : ordered_overide, \
                                  'ordered' : ordered, \
                                  'str_convert' : str_convert}}
    
      column_dict = {tc : {'category' : 'ordl', \
                           'origcategory' : category, \
                           'normalization_dict' : normalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : categorylist, \
                           'categorylist' : categorylist, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
    
    return mdf_train, mdf_test, column_dict_list

  def _process_ord3(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    '''
    #process_ord3(mdf_train, mdf_test, column, category)
    #preprocess column with categories into ordinal (sequentuial integer) sets
    #corresponding to categories sorted by frequency of occurance
    #adresses infill with new point which we arbitrarily set as 'zzzinfill'
    #intended to show up as last point in set alphabetically
    #for categories presetn in test set not present in train set use this 'zzz' category
    #as implemented this function seperately encodes numbers and string equivalent (eg 2 != '2')
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
      
    #ordered_overide is boolean to indicate if order of integer encoding basis will 
    #defer to cases when a column is a pandas categorical ordered set
    if 'ordered_overide' in params:
      ordered_overide = params['ordered_overide']
    else:
      ordered_overide = True
      
    #str_convert provides consistent encodings between numbers and string equivalent, eg 2 == '2'
    if 'str_convert' in params:
      str_convert = params['str_convert']
    else:
      str_convert = False
    
    if inplace is not True:
      
      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self._df_copy_train(mdf_train, column, column + '_ord3', suffixoverlap_results)

      mdf_test[column + '_ord3'] = mdf_test[column].copy()
    
    else:
      
      suffixoverlap_results = \
      self._df_check_suffixoverlap(mdf_train, column + '_ord3', suffixoverlap_results)
      
      mdf_train.rename(columns = {column : column + '_ord3'}, inplace = True)
      mdf_test.rename(columns = {column : column + '_ord3'}, inplace = True)
    
    ordered = False
    if ordered_overide:
      if mdf_train[column + '_ord3'].dtype.name == 'category':
        if mdf_train[column + '_ord3'].cat.ordered:
          ordered = True
          labels_train = list(mdf_train[column + '_ord3'].cat.categories)
          if mdf_test[column + '_ord3'].dtype.name == 'category':
            if mdf_test[column + '_ord3'].cat.ordered:
              labels_test = list(mdf_test[column + '_ord3'].cat.categories)
            else:
              ordered = False
          else:
            ordered = False
    
    #convert column to category if it isn't already
    mdf_train[column + '_ord3'] = mdf_train[column + '_ord3'].astype('category')
    mdf_test[column + '_ord3'] = mdf_test[column + '_ord3'].astype('category')

    #if set is categorical we'll need the plug value for missing values included
    if 'zzzinfill' not in mdf_train[column + '_ord3'].cat.categories:
      mdf_train[column + '_ord3'] = mdf_train[column + '_ord3'].cat.add_categories(['zzzinfill'])
    if 'zzzinfill' not in mdf_test[column + '_ord3'].cat.categories:
      mdf_test[column + '_ord3'] = mdf_test[column + '_ord3'].cat.add_categories(['zzzinfill'])

    #replace NA with a dummy variable
    mdf_train[column + '_ord3'] = mdf_train[column + '_ord3'].fillna('zzzinfill')
    mdf_test[column + '_ord3'] = mdf_test[column + '_ord3'].fillna('zzzinfill')
    
    if str_convert is True:
      #replace numerical with string equivalent (this operation changes dtype from category to object)
      mdf_train[column + '_ord3'] = mdf_train[column + '_ord3'].astype(str)
      mdf_test[column + '_ord3'] = mdf_test[column + '_ord3'].astype(str)
      if ordered is True:
        labels_train = [str(x) for x in labels_train]
        labels_test = [str(x) for x in labels_test]
    else:
      mdf_train[column + '_ord3'] = mdf_train[column + '_ord3'].astype('object')
      mdf_test[column + '_ord3'] = mdf_test[column + '_ord3'].astype('object')
            
    if ordered is False:
      
      #extract categories for column labels
      #with values sorted by frequency of occurance from most to least
      labels_train = pd.DataFrame(mdf_train[column + '_ord3'].value_counts())
      labels_train = labels_train.rename_axis('zzzinfill').sort_values(by = [column + '_ord3', 'zzzinfill'], ascending = [False, True])
      labels_train = list(labels_train.index)
      
      labels_test = list(mdf_test[column + '_ord3'].unique())

    #if infill not present in train set, insert
    if 'zzzinfill' not in labels_train:
      labels_train = labels_train + ['zzzinfill']
    if 'zzzinfill' not in labels_test:
      labels_test = labels_test + ['zzzinfill']
    
    listlength = len(labels_train)
    
    #____
    #quick check if there are any overlaps between binary encodings and prior unique values in the column
    #as would interfere with the replacement operation
    
    overlap_list = []
    overlap_replace = {}
    for value in labels_train:
      if value in range(listlength):
        overlap_list.append(value)
        
        #here's what we'll replace with, the string suffix is arbitrary and intended as not likely to be in set
        overlap_replace.update({value : str(value) + 'encoding_overlap'})
    
    #here we replace the overlaps with version with jibberish suffix
    if len(overlap_list) > 0:
      
      if ordered is True:
        #this replaces entries with overlap while retaining order
        for foundoverlap in overlap_replace:
          labels_train = [overlap_replace[foundoverlap] if x == foundoverlap else x for x in labels_train]
          labels_test = [overlap_replace[foundoverlap] if x == foundoverlap else x for x in labels_test]
          
        #then replace encoding overlap entries in the returned column
        mdf_train[column + '_ord3'] = mdf_train[column + '_ord3'].replace(overlap_replace)
        mdf_test[column + '_ord3'] = mdf_test[column + '_ord3'].replace(overlap_replace)

      if ordered is False:
            
        mdf_train[column + '_ord3'] = mdf_train[column + '_ord3'].replace(overlap_replace)
        mdf_test[column + '_ord3'] = mdf_test[column + '_ord3'].replace(overlap_replace)

        #then we'll redo the encodings

        #extract categories for column labels
        #note that .unique() extracts the labels as a numpy array
        labels_train = pd.DataFrame(mdf_train[column + '_ord3'].value_counts())
        labels_train = labels_train.rename_axis('zzzinfill').sort_values(by = [column + '_ord3', 'zzzinfill'], ascending = [False, True])
        labels_train = list(labels_train.index)

        labels_test = list(mdf_test[column + '_ord3'].unique())
        
      #if infill not present in train set, insert
      if 'zzzinfill' not in labels_train:
        labels_train = labels_train + ['zzzinfill']
      if 'zzzinfill' not in labels_test:
        labels_test = labels_test + ['zzzinfill']
      
    #clear up memory
    del overlap_list
    
    #____
    
    #get length of the list, then zip a dictionary from list and range(length)
    #the range values will be our ordinal points to replace the categories
    listlength = len(labels_train)
    ordinal_dict = dict(zip(labels_train, range(listlength)))
    
    #there is an edge case for replace operation is dtyp drifted from object such as to numeric
    if mdf_train[column + '_ord3'].dtype.name != 'object':
      mdf_train[column + '_ord3'] = mdf_train[column + '_ord3'].astype('object')
    
    #replace the cateogries in train set via ordinal trasnformation
    mdf_train[column + '_ord3'] = mdf_train[column + '_ord3'].replace(ordinal_dict)
    
    #in test set, we'll need to strike any categories that weren't present in train
    #first let'/s identify what applies
    testspecificcategories = list(set(labels_test)-set(labels_train))
    
    #so we'll just replace those items with our plug value
    testplug_dict = dict(zip(testspecificcategories, ['zzzinfill'] * len(testspecificcategories)))
    if mdf_test[column + '_ord3'].dtype.name != 'object':
      mdf_test[column + '_ord3'] = mdf_test[column + '_ord3'].astype('object')
    mdf_test[column + '_ord3'] = mdf_test[column + '_ord3'].replace(testplug_dict)
    
    #now we'll apply the ordinal transformation to the test set
    if mdf_test[column + '_ord3'].dtype.name != 'object':
      mdf_test[column + '_ord3'] = mdf_test[column + '_ord3'].astype('object')
    mdf_test[column + '_ord3'] = mdf_test[column + '_ord3'].replace(ordinal_dict)
    
    #just want to make sure these arent' being saved as floats for memory considerations
    if len(ordinal_dict) < 254:
      mdf_train[column + '_ord3'] = mdf_train[column + '_ord3'].astype(np.uint8)
      mdf_test[column + '_ord3'] = mdf_test[column + '_ord3'].astype(np.uint8)
    elif len(ordinal_dict) < 65534:
      mdf_train[column + '_ord3'] = mdf_train[column + '_ord3'].astype(np.uint16)
      mdf_test[column + '_ord3'] = mdf_test[column + '_ord3'].astype(np.uint16)
    else:
      mdf_train[column + '_ord3'] = mdf_train[column + '_ord3'].astype(np.uint32)
      mdf_test[column + '_ord3'] = mdf_test[column + '_ord3'].astype(np.uint32)
    
#     #convert column to category
#     mdf_train[column + '_ordl'] = mdf_train[column + '_ordl'].astype('category')
#     mdf_test[column + '_ordl'] = mdf_test[column + '_ordl'].astype('category')

#     #change data type for memory savings
#     mdf_train[column + '_ordl'] = mdf_train[column + '_ordl'].astype(np.int32)
#     mdf_test[column + '_ordl'] = mdf_test[column + '_ordl'].astype(np.int32)

    #new driftreport metric ordl_activations_dict
    ordl_activations_dict = {}
    for key in ordinal_dict:
      sumcalc = (mdf_train[column+'_ord3'] == ordinal_dict[key]).sum() 
      ratio = sumcalc / mdf_train[column+'_ord3'].shape[0]
      ordl_activations_dict.update({key:ratio})

    inverse_ordinal_dict = {value:key for key,value in ordinal_dict.items()}
    activations_list = list(inverse_ordinal_dict)
    
    categorylist = [column + '_ord3']  
        
    column_dict_list = []
    
    for tc in categorylist:
        
      normalization_dict = {tc : {'ordinal_dict' : ordinal_dict, \
                                  'inverse_ordinal_dict' : inverse_ordinal_dict, \
                                  'activations_list' : activations_list, \
                                  'ordinal_overlap_replace' : overlap_replace, \
                                  'ordl_activations_dict' : ordl_activations_dict, \
                                  'ordered_overide' : ordered_overide, \
                                  'ordered' : ordered, \
                                  'str_convert' : str_convert}}
    
      column_dict = {tc : {'category' : 'ord3', \
                           'origcategory' : category, \
                           'normalization_dict' : normalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : categorylist, \
                           'categorylist' : categorylist, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
    
    return mdf_train, mdf_test, column_dict_list

  def _process_maxb(self, mdf_train, mdf_test, column, category, \
                   postprocess_dict, params = {}):
    '''
    #process_maxb
    #function to translate an ord3 ordinal encoded categoric set
    #to a reduced number of activations based on a maxbincount parameter
    #where maxbincount can be passed as integer
    #for setting the maximum number of activations
    #alternately, user can set parameter minentrycount
    #as integer of the minimum number of entries in train set to include actiations
    #finally, minentryratio can also be passed as a float between 0-1
    #to designate the minimum ratio of entries in an activation to register
    #consolidated activations will be aggregated into the top activation
    #eg if maxbincount is 4, 0-3 will be retained and reaminder aggregated into 4
    #for missing values, uses adjacent cell infill as default
    #we'll set default values as False signalling not applied
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    #initialize parameters
    if 'maxbincount' in params:
      maxbincount = params['maxbincount']
    else:
      maxbincount = False
      
    #initialize parameters
    if 'minentrycount' in params:
      minentrycount = params['minentrycount']
    else:
      minentrycount = False
      
    #initialize parameters
    if 'minentryratio' in params:
      minentryratio = params['minentryratio']
    else:
      minentryratio = False
    
    if inplace is not True:
      
      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self._df_copy_train(mdf_train, column, column + '_maxb', suffixoverlap_results)

      mdf_test[column + '_maxb'] = mdf_test[column].copy()
    
    else:
      
      suffixoverlap_results = \
      self._df_check_suffixoverlap(mdf_train, column + '_maxb', suffixoverlap_results)
      
      mdf_train.rename(columns = {column : column + '_maxb'}, inplace = True)
      mdf_test.rename(columns = {column : column + '_maxb'}, inplace = True)
    
    #convert all values to either numeric or NaN
    mdf_train[column + '_maxb'] = pd.to_numeric(mdf_train[column + '_maxb'], errors='coerce')
    mdf_test[column + '_maxb'] = pd.to_numeric(mdf_test[column + '_maxb'], errors='coerce')
    
    #non integers are subject to infill
    mdf_train[column + '_maxb'] = np.where(mdf_train[column + '_maxb'] == mdf_train[column + '_maxb'].round(), mdf_train[column + '_maxb'], np.nan)
    mdf_test[column + '_maxb'] = np.where(mdf_test[column + '_maxb'] == mdf_test[column + '_maxb'].round(), mdf_test[column + '_maxb'], np.nan)
    
    #apply ffill to replace nan with value from adjacent cell in pre4ceding row
    mdf_train[column + '_maxb'] = mdf_train[column + '_maxb'].fillna(method='ffill')
    mdf_test[column + '_maxb'] = mdf_test[column + '_maxb'].fillna(method='ffill')
    
    #we'll follow with a bfill just in case first row had a nan
    mdf_train[column + '_maxb'] = mdf_train[column + '_maxb'].fillna(method='bfill')
    mdf_test[column + '_maxb'] = mdf_test[column + '_maxb'].fillna(method='bfill')
    
    #then one more infill with to address scenario when data wasn't numeric
    #get arbitrary cell value, if one is nan then all will be
    mdf_train[column + '_maxb'] = mdf_train[column + '_maxb'].fillna(0)
    mdf_test[column + '_maxb'] = mdf_test[column + '_maxb'].fillna(0)
    
    #get maximum train set activation which for ord3 will be least frequent entry
    maxactivation = int(mdf_train[column + '_maxb'].max())
    
    #first we'll inspect maxbincount
    bincount_maxactivation = maxactivation
    if maxbincount is not False:
      
      #current max activation
      if maxactivation > maxbincount:
        
        bincount_maxactivation = maxbincount
        
        mdf_train[column + '_maxb'] = \
        np.where(mdf_train[column + '_maxb'] < bincount_maxactivation, mdf_train[column + '_maxb'], bincount_maxactivation)
        mdf_test[column + '_maxb'] = \
        np.where(mdf_test[column + '_maxb'] < bincount_maxactivation, mdf_test[column + '_maxb'], bincount_maxactivation)
    
    #then inspect minentrycount
    count_maxactivation = maxactivation
    if minentrycount is not False:
      
      if minentrycount >= 1:

        entry_counts = {}
        for i in range(maxactivation + 1):

          count = mdf_train[mdf_train[column + '_maxb'] == i].shape[0]
          entry_counts.update({i : count})
          
        for i in entry_counts:
          
          if entry_counts[i] < minentrycount:
            
            count_maxactivation = i
            break
            
        if count_maxactivation < maxactivation:
            
          mdf_train[column + '_maxb'] = \
          np.where(mdf_train[column + '_maxb'] < count_maxactivation, mdf_train[column + '_maxb'], count_maxactivation)
          mdf_test[column + '_maxb'] = \
          np.where(mdf_test[column + '_maxb'] < count_maxactivation, mdf_test[column + '_maxb'], count_maxactivation)
      
    #else if minentrycount passed as a ratio
    ratio_maxactivation = maxactivation
    if minentryratio is not False:

      if minentryratio > 0. and minentryratio < 1.:

        train_row_count = mdf_train.shape[0]

        entry_ratios = {}
        for i in range(maxactivation + 1):

          ratio = mdf_train[mdf_train[column + '_maxb'] == i].shape[0] / train_row_count
          entry_ratios.update({i : ratio})

        for i in entry_ratios:

          if entry_ratios[i] < minentryratio:

            ratio_maxactivation = i
            break

        if ratio_maxactivation < maxactivation:

          mdf_train[column + '_maxb'] = \
          np.where(mdf_train[column + '_maxb'] < ratio_maxactivation, mdf_train[column + '_maxb'], ratio_maxactivation)
          mdf_test[column + '_maxb'] = \
          np.where(mdf_test[column + '_maxb'] < ratio_maxactivation, mdf_test[column + '_maxb'], ratio_maxactivation)

    #create list of columns
    nmbrcolumns = [column + '_maxb']

    #grab some driftreport metrics
    new_maxactivation = maxactivation
    if bincount_maxactivation < new_maxactivation:
      new_maxactivation = bincount_maxactivation
    if count_maxactivation < new_maxactivation:
      new_maxactivation = count_maxactivation
    if ratio_maxactivation < new_maxactivation:
      new_maxactivation = ratio_maxactivation
    
    consolidation_count = 0
    if new_maxactivation < maxactivation:
      consolidation_count = mdf_train[mdf_train[column + '_maxb'] == new_maxactivation].shape[0]
      
    #set integer type based on encoding depth
    if new_maxactivation < 254:
      mdf_train[column + '_maxb'] = mdf_train[column + '_maxb'].astype(np.uint8)
      mdf_test[column + '_maxb'] = mdf_test[column + '_maxb'].astype(np.uint8)
    elif new_maxactivation < 65534:
      mdf_train[column + '_maxb'] = mdf_train[column + '_maxb'].astype(np.uint16)
      mdf_test[column + '_maxb'] = mdf_test[column + '_maxb'].astype(np.uint16)
    else:
      mdf_train[column + '_maxb'] = mdf_train[column + '_maxb'].astype(np.uint32)
      mdf_test[column + '_maxb'] = mdf_test[column + '_maxb'].astype(np.uint32)

    normalization_dict = {column + '_maxb' : {'bincount_maxactivation' : bincount_maxactivation, \
                                              'count_maxactivation' : count_maxactivation, \
                                              'ratio_maxactivation' : ratio_maxactivation, \
                                              'new_maxactivation' : new_maxactivation, \
                                              'orig_maxactivation' : maxactivation, \
                                              'consolidation_count' : consolidation_count, \
                                              'maxbincount' : maxbincount, \
                                              'minentrycount' : minentrycount, \
                                              'minentryratio' : minentryratio}}
    
    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:
      
      column_dict = { nc : {'category' : 'maxb', \
                           'origcategory' : category, \
                           'normalization_dict' : normalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
        
    return mdf_train, mdf_test, column_dict_list
  
  def _process_ucct(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    '''
    #process_ucct(mdf_train, mdf_test, column, category)
    #preprocess column with categories into unique class count sets
    #normalized by total row count
    #e.g. for each class in train set, 
    #counts instances and divides by total train set row count
    #(so values will fall in range 0-1)
    #test sets recive comparable encoding
    '''
    
    suffixoverlap_results = {}
    
    #create new column for trasnformation
    mdf_train, suffixoverlap_results = \
    self._df_copy_train(mdf_train, column, column + '_ucct', suffixoverlap_results)
    
    mdf_test[column + '_ucct'] = mdf_test[column].copy()
    
    #convert column to category
    mdf_train[column + '_ucct'] = mdf_train[column + '_ucct'].astype('category')
    mdf_test[column + '_ucct'] = mdf_test[column + '_ucct'].astype('category')

    #if set is categorical we'll need the plug value for missing values included
    if 'zzzinfill' not in mdf_train[column + '_ucct'].cat.categories:
      mdf_train[column + '_ucct'] = mdf_train[column + '_ucct'].cat.add_categories(['zzzinfill'])
    if 'zzzinfill' not in mdf_test[column + '_ucct'].cat.categories:
      mdf_test[column + '_ucct'] = mdf_test[column + '_ucct'].cat.add_categories(['zzzinfill'])

    #replace NA with a dummy variable
    mdf_train[column + '_ucct'] = mdf_train[column + '_ucct'].fillna('zzzinfill')
    mdf_test[column + '_ucct'] = mdf_test[column + '_ucct'].fillna('zzzinfill')

    #replace numerical with string equivalent
    mdf_train[column + '_ucct'] = mdf_train[column + '_ucct'].astype(str)
    mdf_test[column + '_ucct'] = mdf_test[column + '_ucct'].astype(str)
    
    #extract categories for column labels
    #with values sorted by frequency of occurance from most to least
    labels_train = pd.DataFrame(mdf_train[column + '_ucct'].value_counts())
    labels_train = labels_train.rename_axis('zzzinfill').sort_values(by = [column + '_ucct', 'zzzinfill'], ascending = [False, True])
    labels_train = list(labels_train.index)
    
#     labels_train = list(mdf_train[column + '_ordl'].unique())
#     labels_train.sort()
    labels_test = list(mdf_test[column + '_ucct'].unique())
    labels_test.sort()

    #if infill not present in train set, insert
    if 'zzzinfill' not in labels_train:
      labels_train = labels_train + ['zzzinfill']
#       labels_train.sort()
    if 'zzzinfill' not in labels_test:
      labels_test = labels_test + ['zzzinfill']
      labels_test.sort()
    
    listlength = len(labels_train)
    
    #____
    #quick check if there are any overlaps between binary encodings and prior unique values in the column
    #as would interfere with the replacement operation
    #(I know this is an outlier scenario, just trying to be thorough)
    
    overlap_list = []
    overlap_replace = {}
    for value in labels_train:
      if value in range(listlength):
        overlap_list.append(value)
        
        #here's what we'll replace with, the string suffix is arbitrary and intended as not likely to be in set
        overlap_replace.update({value : value + 'encoding_overlap'})
    
    #here we replace the overlaps with version with jibberish suffix
    if len(overlap_list) > 0:
      mdf_train[column + '_ucct'] = mdf_train[column + '_ucct'].replace(overlap_replace)
      mdf_test[column + '_ucct'] = mdf_test[column + '_ucct'].replace(overlap_replace)
      
      #then we'll redo the encodings
      
      #extract categories for column labels
      #note that .unique() extracts the labels as a numpy array
      labels_train = pd.DataFrame(mdf_train[column + '_ucct'].value_counts())
      labels_train = labels_train.rename_axis('zzzinfill').sort_values(by = [column + '_ucct', 'zzzinfill'], ascending = [False, True])
      labels_train = list(labels_train.index)
      
#       labels_train = list(mdf_train[column + '_ord2'].unique())
#       labels_train.sort()
      labels_test = list(mdf_test[column + '_ucct'].unique())
      labels_test.sort()
      
    #clear up memory
    del overlap_list
    
    #____
    
    #assemble the ordinal_dict
    #with key of class and value of normalized unique class count
    ordinal_dict = {}
    rowcount = mdf_train.shape[0]
    
    for item in labels_train:
      item_count = mdf_train[mdf_train[column + '_ucct'] == item].shape[0]
      ordinal_dict.update({item: item_count / rowcount})
    
    #replace the cateogries in train set via ordinal trasnformation
    mdf_train[column + '_ucct'] = mdf_train[column + '_ucct'].replace(ordinal_dict)
    
    #in test set, we'll need to strike any categories that weren't present in train
    #first let'/s identify what applies
    testspecificcategories = list(set(labels_test)-set(labels_train))
    
    #so we'll just replace those items with our plug value
    testplug_dict = dict(zip(testspecificcategories, ['zzzinfill'] * len(testspecificcategories)))
    mdf_test[column + '_ucct'] = mdf_test[column + '_ucct'].replace(testplug_dict)
    
    #now we'll apply the ordinal transformation to the test set
    mdf_test[column + '_ucct'] = mdf_test[column + '_ucct'].replace(ordinal_dict)

    #new driftreport metric ordl_activations_dict
    ordl_activations_dict = {}
    for key in ordinal_dict:
      sumcalc = (mdf_train[column+'_ucct'] == ordinal_dict[key]).sum() 
      ratio = sumcalc / mdf_train[column+'_ucct'].shape[0]
      ordl_activations_dict.update({key:ratio})
    
    categorylist = [column + '_ucct']  
        
    column_dict_list = []
    
    for tc in categorylist:
        
      normalization_dict = {tc : {'ordinal_dict' : ordinal_dict, \
                                  'ordinal_overlap_replace' : overlap_replace, \
                                  'ordl_activations_dict' : ordl_activations_dict}}
    
      column_dict = {tc : {'category' : 'ucct', \
                           'origcategory' : category, \
                           'normalization_dict' : normalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : categorylist, \
                           'categorylist' : categorylist, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
    
    return mdf_train, mdf_test, column_dict_list
  
  def _process_1010(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    '''
    #process_1010(mdf_train, mdf_test, column, category)
    #preprocess column with categories into binary encoded sets
    #corresponding to (sorted) categories of >2 values
    #adresses infill with new point which we arbitrarily set as 'zzzinfill'
    #intended to show up as last point in set alphabetically
    #for categories present in test set not present in train set uses this 'zzzinfill' category
    '''
    
    suffixoverlap_results = {}
      
    #str_convert provides consistent encodings between numbers and string equivalent, eg 2 == '2'
    if 'str_convert' in params:
      str_convert = params['str_convert']
    else:
      str_convert = False
    
    #create new column for trasnformation
    mdf_train, suffixoverlap_results = \
    self._df_copy_train(mdf_train, column, column + '_1010', suffixoverlap_results)
    
    mdf_test[column + '_1010'] = mdf_test[column].copy()
    
    #convert column to category
    mdf_train[column + '_1010'] = mdf_train[column + '_1010'].astype('category')
    mdf_test[column + '_1010'] = mdf_test[column + '_1010'].astype('category')

    #if set is categorical we'll need the plug value for missing values included
    if 'zzzinfill' not in mdf_train[column + '_1010'].cat.categories:
      mdf_train[column + '_1010'] = mdf_train[column + '_1010'].cat.add_categories(['zzzinfill'])
    if 'zzzinfill' not in mdf_test[column + '_1010'].cat.categories:
      mdf_test[column + '_1010'] = mdf_test[column + '_1010'].cat.add_categories(['zzzinfill'])

    #replace NA with a dummy variable
    mdf_train[column + '_1010'] = mdf_train[column + '_1010'].fillna('zzzinfill')
    mdf_test[column + '_1010'] = mdf_test[column + '_1010'].fillna('zzzinfill')

    if str_convert is True:
      #replace numerical with string equivalent
      mdf_train[column + '_1010'] = mdf_train[column + '_1010'].astype(str)
      mdf_test[column + '_1010'] = mdf_test[column + '_1010'].astype(str)
    else:
      mdf_train[column + '_1010'] = mdf_train[column + '_1010'].astype('object')
      mdf_test[column + '_1010'] = mdf_test[column + '_1010'].astype('object')
    
    #extract categories for column labels
    #note that .unique() extracts the labels as a numpy array
    labels_train = list(mdf_train[column + '_1010'].unique())
#     labels_train.sort()
    labels_train = sorted(labels_train, key=str)
    labels_test = list(mdf_test[column + '_1010'].unique())
#     labels_test.sort()
    labels_test = sorted(labels_test, key=str)

    #if infill not present in train set, insert
    if 'zzzinfill' not in labels_train:
      labels_train = labels_train + ['zzzinfill']
      labels_train = sorted(labels_train, key=str)
#       labels_train.sort()
    if 'zzzinfill' not in labels_test:
      labels_test = labels_test + ['zzzinfill']
      labels_test = sorted(labels_test, key=str)
#       labels_test.sort()
    
    #get length of the list
    listlength = len(labels_train)
    
    #calculate number of columns we'll need
    #currently using numk;py since already imported, this could also be done with math library
    binary_column_count = int(np.ceil(np.log2(listlength)))
    
    #initialize dictionaryt to store encodings
    binary_encoding_dict = {}
    encoding_list = []
    
    for i in range(listlength):
      
      #this converts the integer i to binary encoding
      #where f is an f string for inserting the column coount into the string to designate length of encoding
      #0 is to pad out the encoding with 0's for the length
      #and b is telling it to convert to binary 
      #note this returns a string
      encoding = format(i, f"0{binary_column_count}b")
      
      if i < len(labels_train):

        #store the encoding in a dictionary
        binary_encoding_dict.update({labels_train[i] : encoding})

        #store the encoding in a list for checking in next step
        encoding_list.append(encoding)

    #____
    #quick check if there are any overlaps between binary encodings and prior unique values in the column
    #as would interfere with the replacement operation
    #(I know this is an outlier scenario, just trying to be thorough)
    
    overlap_list = []
    overlap_replace = {}
    for value in labels_train:
      if value in encoding_list:
        overlap_list.append(value)
        
        #since overlapreplace will add suffix to the category overlapped with a binary encoding
        #let's quickly check if that category plus suffix is already present in the set
        #if so we'll keep adding digits until a unique entry
        encoding_overlap_suffix = 'encoding_overlap'
        for i in range(111):
          j = random.randint(0,9)
          if value + encoding_overlap_suffix in labels_train:
            encoding_overlap_suffix += str(j)
          else:
            break
        
        #here's what we'll replace with, the string suffix is arbitrary and intended as not likely to be in set
        overlap_replace.update({value : value + encoding_overlap_suffix})

    #here we replace the overlaps with version with jibberish suffix
    if len(overlap_list) > 0:
      
      mdf_train[column + '_1010'] = mdf_train[column + '_1010'].replace(overlap_replace)
      mdf_test[column + '_1010'] = mdf_test[column + '_1010'].replace(overlap_replace)
      
      #then we'll redo the encodings
      
      #extract categories for column labels
      #note that .unique() extracts the labels as a numpy array
      labels_train = list(mdf_train[column + '_1010'].unique())
      labels_train = sorted(labels_train, key=str)
#       labels_train.sort()
      
      labels_test = list(mdf_test[column + '_1010'].unique())
      labels_test = sorted(labels_test, key=str)
#       labels_test.sort()

      #if infill not present in train set, insert
      if 'zzzinfill' not in labels_train:
        labels_train = labels_train + ['zzzinfill']
        labels_train = sorted(labels_train, key=str)
  #       labels_train.sort()
      if 'zzzinfill' not in labels_test:
        labels_test = labels_test + ['zzzinfill']
        labels_test = sorted(labels_test, key=str)
  #       labels_test.sort()
      
      #initialize dictionaryt to store encodings
      binary_encoding_dict = {}
      encoding_list = []

      for i in range(listlength):

        #this converts the integer i to binary encoding
        #where f is an f string for inserting the column coount into the string to designate length of encoding
        #0 is to pad out the encoding with 0's for the length
        #and b is telling it to convert to binary 
        #note this returns a string
        encoding = format(i, f"0{binary_column_count}b")
        
        if i < len(labels_train):

          #store the encoding in a dictionary
          binary_encoding_dict.update({labels_train[i] : encoding})

          #store the encoding in a list for checking in next step
          encoding_list.append(encoding)

    #clear up memory
    del encoding_list
    del overlap_list
    
    #new driftreport metric _1010_activations_dict
    _1010_activations_dict = {}
    for key in binary_encoding_dict:
      sumcalc = (mdf_train[column+'_1010'] == key).sum() 
      ratio = sumcalc / mdf_train[column+'_1010'].shape[0]
      _1010_activations_dict.update({key:ratio})
    
    #____
    
    #replace the cateogries in train set via ordinal trasnformation
    
    if mdf_train[column + '_1010'].dtype.name != 'object':
      mdf_train[column + '_1010'] = mdf_train[column + '_1010'].astype('object')
    
    mdf_train[column + '_1010'] = mdf_train[column + '_1010'].replace(binary_encoding_dict)      
    
    #in test set, we'll need to strike any categories that weren't present in train
    #first let'/s identify what applies
    testspecificcategories = list(set(labels_test)-set(labels_train))
    
    #so we'll just replace those items with our plug value
    if mdf_test[column + '_1010'].dtype.name != 'object':
      mdf_test[column + '_1010'] = mdf_test[column + '_1010'].astype('object')
    testplug_dict = dict(zip(testspecificcategories, ['zzzinfill'] * len(testspecificcategories)))
    mdf_test[column + '_1010'] = mdf_test[column + '_1010'].replace(testplug_dict)    
    
    #now we'll apply the 1010 transformation to the test set
    if mdf_test[column + '_1010'].dtype.name != 'object':
      mdf_test[column + '_1010'] = mdf_test[column + '_1010'].astype('object')
    mdf_test[column + '_1010'] = mdf_test[column + '_1010'].replace(binary_encoding_dict)    

    #ok let's create a list of columns to store each entry of the binary encoding
    _1010_columnlist = []
    
    for i in range(binary_column_count):
      
      _1010_columnlist.append(column + '_1010_' + str(i))
      
    suffixoverlap_results = \
    self._df_check_suffixoverlap(mdf_train, _1010_columnlist, suffixoverlap_results)
      
    #now let's store the encoding
    i=0
    for _1010_column in _1010_columnlist:
      
      mdf_train[_1010_column] = mdf_train[column + '_1010'].str.slice(i,i+1).astype(np.int8)
      
      mdf_test[_1010_column] = mdf_test[column + '_1010'].str.slice(i,i+1).astype(np.int8)
      
      i+=1
  
    #now delete the support column
    del mdf_train[column + '_1010']
    del mdf_test[column + '_1010']
    
    #now store the column_dict entries
    
    categorylist = _1010_columnlist
        
    column_dict_list = []
    
    for tc in categorylist:
        
      normalization_dict = {tc : {'_1010_binary_encoding_dict' : binary_encoding_dict, \
                                  '_1010_overlap_replace' : overlap_replace, \
                                  '_1010_binary_column_count' : binary_column_count, \
                                  '_1010_activations_dict' : _1010_activations_dict, \
                                  'str_convert' : str_convert}}
    
      column_dict = {tc : {'category' : '1010', \
                           'origcategory' : category, \
                           'normalization_dict' : normalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : categorylist, \
                           'categorylist' : categorylist, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
    
    return mdf_train, mdf_test, column_dict_list

  def _process_bshr(self, df, column, category, postprocess_dict, params = {}):
    '''
    #processing funciton depending on input format of datetime data 
    #that creates a boolean column indicating 1 for rows
    #corresponding to traditional business hours in source column
    #note this is a "singleprocess" function since is applied to single dataframe
    '''
    
    suffixoverlap_results = {}
    
    #initialize parameters
    if 'start' in params:
      start = params['start']
    else:
      start = 9
      
    if 'end' in params:
      end = params['end']
    else:
      end = 17
      
    suffixoverlap_results = \
    self._df_check_suffixoverlap(df, column+'_bshr', suffixoverlap_results)
    
    #convert improperly formatted values to datetime in new column
    df[column+'_bshr'] = pd.to_datetime(df[column], errors = 'coerce')
    
    #This is kind of hack for whole hour increments, if we were needing
    #to evlauate hour ranges between seperate days a different metod
    #would be required
    #For now we'll defer to Dollly Parton
    df[column+'_bshr'] = df[column+'_bshr'].dt.hour
    df[column+'_bshr'] = df[column+'_bshr'].between(start, end)
    
    #reduce memory footprint
    df[column+'_bshr'] = df[column+'_bshr'].astype(np.int8)
    
    #create list of columns
    datecolumns = [column + '_bshr']

    #grab some driftreport metrics
    activationratio = df[column + '_bshr'].sum() / df[column + '_bshr'].shape[0]

    #create normalization dictionary
    normalization_dict = {column + '_bshr' : {'activationratio' : activationratio}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for dc in datecolumns:

      column_dict = { dc : {'category' : 'bshr', \
                           'origcategory' : category, \
                           'normalization_dict' : normalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : datecolumns, \
                           'categorylist' : datecolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())

    return df, column_dict_list

  def _process_wkdy(self, df, column, category, postprocess_dict, params = {}):
    '''
    #processing funciton depending on input format of datetime data 
    #that creates a boolean column indicating 1 for rows
    #corresponding to weekdays in source column
    #note this is a "singleprocess" function since is applied to single dataframe
    '''
    
    suffixoverlap_results = {}
    
    suffixoverlap_results = \
    self._df_check_suffixoverlap(df, column+'_wkdy', suffixoverlap_results)
    
    #convert improperly formatted values to datetime in new column
    df[column+'_wkdy'] = pd.to_datetime(df[column], errors = 'coerce')
    
    #This is kind of hack for whole hour increments, if we were needing
    #to evlauate hour ranges between seperate days a different metod
    #would be required
    #For now we'll defer to Dollly Parton
    df[column+'_wkdy'] = pd.DatetimeIndex(df[column+'_wkdy']).dayofweek
    
    df[column+'_wkdy'] = df[column+'_wkdy'].between(0,4)
    
    #reduce memory footprint
    df[column+'_wkdy'] = df[column+'_wkdy'].astype(np.int8)
    
    #create list of columns
    datecolumns = [column+'_wkdy']

    #grab some driftreport metrics
    activationratio = df[column + '_wkdy'].sum() / df[column + '_wkdy'].shape[0]

    #create normalization dictionary
    normalization_dict = {column + '_wkdy' : {'activationratio' : activationratio}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for dc in datecolumns:

      column_dict = { dc : {'category' : 'wkdy', \
                           'origcategory' : category, \
                           'normalization_dict' : normalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : datecolumns, \
                           'categorylist' : datecolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'deletecolumn' : False, \
                           'suffixoverlap_results' : suffixoverlap_results}}

      column_dict_list.append(column_dict.copy())

    return df, column_dict_list

  def _process_hldy(self, df, column, category, postprocess_dict, params = {}):
    '''
    #processing funciton depending on input format of datetime data 
    #that creates a boolean column indicating 1 for rows
    #corresponding to US Federal Holidays in source column
    #note this is a "singleprocess" function since is applied to single dataframe
    '''
    
    suffixoverlap_results = {}
    
    #initialize parameters
    if 'holiday_list' in params:
      holiday_list = params['holiday_list']
    else:
      holiday_list = []
    
    if len(holiday_list) > 0:
    
      #reformat holiday_list
      holiday_list = pd.to_datetime(pd.DataFrame(holiday_list)[0], errors = 'coerce')

      #reform holiday_list again
      timestamp_list = []

      for row in range(holiday_list.shape[0]):
        timestamp = pd.Timestamp(holiday_list[row])
        timestamp_list += [timestamp]
      timestamp_list
      
    else:
      timestamp_list = []
      
    suffixoverlap_results = \
    self._df_check_suffixoverlap(df, column+'_hldy', suffixoverlap_results)
    
    #convert improperly formatted values to datetime in new column
    df[column+'_hldy'] = pd.to_datetime(df[column], errors = 'coerce')
    
    df[column+'_hldy'] = df[column+'_hldy'].dt.date
    
    df[column+'_hldy'] = pd.to_datetime(df[column+'_hldy'], errors = 'coerce')
    
    #grab list of holidays from import
    holidays = USFederalHolidayCalendar().holidays().tolist()

    holidays += timestamp_list
    
    #activate boolean identifier for holidays
    df[column+'_hldy'] = df[column+'_hldy'].isin(holidays)

    #reduce memory footprint
    df[column+'_hldy'] = df[column+'_hldy'].astype(np.int8)
    
    #create list of columns
    datecolumns = [column + '_hldy']

    #grab some driftreport metrics
    activationratio = df[column + '_hldy'].sum() / df[column + '_hldy'].shape[0]

    #create normalization dictionary
    normalization_dict = {column + '_hldy' : {'activationratio' : activationratio}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for dc in datecolumns:

      column_dict = { dc : {'category' : 'hldy', \
                           'origcategory' : category, \
                           'normalization_dict' : normalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : datecolumns, \
                           'categorylist' : datecolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())

    return df, column_dict_list
  
  def _process_wkds(self, df, column, category, postprocess_dict, params = {}):
    '''
    #processing funciton depending on input format of datetime data 
    #that creates a categorical column 
    #corresponding to weekdays in source column
    #note this is a "singleprocess" function since is applied to single dataframe
    #defdault infill is eight days a week
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
      
    if inplace is not True:
      
      #copy source column into new column
      df, suffixoverlap_results = \
      self._df_copy_train(df, column, column + '_wkds', suffixoverlap_results)
    
    else:
      
      suffixoverlap_results = \
      self._df_check_suffixoverlap(df, column + '_wkds', suffixoverlap_results)
      
      df.rename(columns = {column : column + '_wkds'}, inplace = True)
    
    #convert improperly formatted values to datetime in new column
    df[column+'_wkds'] = pd.to_datetime(df[column+'_wkds'], errors = 'coerce')
    
    #This is kind of hack for whole hour increments, if we were needing
    #to evlauate hour ranges between seperate days a different metod
    #would be required
    #For now we'll defer to Dollly Parton
    df[column+'_wkds'] = pd.DatetimeIndex(df[column+'_wkds']).dayofweek
    
#     df[column+'_wkdy'] = df[column+'_wkdy'].between(0,4)

    #we'll use convention for default infill of eight days a week
    df[column + '_wkds'] = df[column + '_wkds'].fillna(7)
    
    #reduce memory footprint
    df[column+'_wkds'] = df[column+'_wkds'].astype(np.int8)
    
    #create list of columns
    datecolumns = [column+'_wkds']

    #grab some driftreport metrics
    numberofrows = df[column + '_wkds'].shape[0]
    mon_ratio = df[df[column + '_wkds'] == 0].shape[0] / numberofrows
    tue_ratio = df[df[column + '_wkds'] == 1].shape[0] / numberofrows
    wed_ratio = df[df[column + '_wkds'] == 2].shape[0] / numberofrows
    thr_ratio = df[df[column + '_wkds'] == 3].shape[0] / numberofrows
    fri_ratio = df[df[column + '_wkds'] == 4].shape[0] / numberofrows
    sat_ratio = df[df[column + '_wkds'] == 5].shape[0] / numberofrows
    sun_ratio = df[df[column + '_wkds'] == 6].shape[0] / numberofrows
    infill_ratio = df[df[column + '_wkds'] == 7].shape[0] / numberofrows
  
  
    #create normalization dictionary
    normalization_dict = {column+'_wkds' : {'mon_ratio' : mon_ratio, \
                                            'tue_ratio' : tue_ratio, \
                                            'wed_ratio' : wed_ratio, \
                                            'thr_ratio' : thr_ratio, \
                                            'fri_ratio' : fri_ratio, \
                                            'sat_ratio' : sat_ratio, \
                                            'sun_ratio' : sun_ratio, \
                                            'infill_ratio' : infill_ratio}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for dc in datecolumns:

      column_dict = { dc : {'category' : 'wkds', \
                           'origcategory' : category, \
                           'normalization_dict' : normalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : datecolumns, \
                           'categorylist' : datecolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())

    return df, column_dict_list
  
  def _process_mnts(self, df, column, category, postprocess_dict, params = {}):
    '''
    #processing funciton depending on input format of datetime data 
    #that creates a categorical column 
    #corresponding to months in source column
    #note this is a "singleprocess" function since is applied to single dataframe
    #default infill is 0
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
      
    if inplace is not True:
      
      #copy source column into new column
      df, suffixoverlap_results = \
      self._df_copy_train(df, column, column + '_mnts', suffixoverlap_results)
    
    else:
      
      suffixoverlap_results = \
      self._df_check_suffixoverlap(df, column + '_mnts', suffixoverlap_results)
      
      df.rename(columns = {column : column + '_mnts'}, inplace = True)
    
    #convert improperly formatted values to datetime in new column
    df[column+'_mnts'] = pd.to_datetime(df[column+'_mnts'], errors = 'coerce')
    
    #This is kind of hack for whole hour increments, if we were needing
    #to evlauate hour ranges between seperate days a different metod
    #would be required
    #For now we'll defer to Dollly Parton
    df[column+'_mnts'] = pd.DatetimeIndex(df[column+'_mnts']).month
    
#     df[column+'_wkdy'] = df[column+'_wkdy'].between(0,4)

    #we'll use convention for default infill of eight days a week
    #jan-dec is 1-12, 0 is default infill
    df[column + '_mnts'] = df[column + '_mnts'].fillna(0)
    
    #reduce memory footprint
    df[column+'_mnts'] = df[column+'_mnts'].astype(np.int8)
    
    #create list of columns
    datecolumns = [column+'_mnts']

    #grab some driftreport metrics
    numberofrows = df[column + '_mnts'].shape[0]
    infill_ratio = df[df[column + '_mnts'] == 0].shape[0] / numberofrows
    jan_ratio = df[df[column + '_mnts'] == 1].shape[0] / numberofrows
    feb_ratio = df[df[column + '_mnts'] == 2].shape[0] / numberofrows
    mar_ratio = df[df[column + '_mnts'] == 3].shape[0] / numberofrows
    apr_ratio = df[df[column + '_mnts'] == 4].shape[0] / numberofrows
    may_ratio = df[df[column + '_mnts'] == 5].shape[0] / numberofrows
    jun_ratio = df[df[column + '_mnts'] == 6].shape[0] / numberofrows
    jul_ratio = df[df[column + '_mnts'] == 7].shape[0] / numberofrows
    aug_ratio = df[df[column + '_mnts'] == 8].shape[0] / numberofrows
    sep_ratio = df[df[column + '_mnts'] == 9].shape[0] / numberofrows
    oct_ratio = df[df[column + '_mnts'] == 10].shape[0] / numberofrows
    nov_ratio = df[df[column + '_mnts'] == 11].shape[0] / numberofrows
    dec_ratio = df[df[column + '_mnts'] == 12].shape[0] / numberofrows
  
    #create normalization dictionary
    normalization_dict = {column+'_mnts' : {'infill_ratio' : infill_ratio, \
                                            'jan_ratio' : jan_ratio, \
                                            'feb_ratio' : feb_ratio, \
                                            'mar_ratio' : mar_ratio, \
                                            'apr_ratio' : apr_ratio, \
                                            'may_ratio' : may_ratio, \
                                            'jun_ratio' : jun_ratio, \
                                            'jul_ratio' : jul_ratio, \
                                            'aug_ratio' : aug_ratio, \
                                            'sep_ratio' : sep_ratio, \
                                            'oct_ratio' : oct_ratio, \
                                            'nov_ratio' : nov_ratio, \
                                            'dec_ratio' : dec_ratio}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for dc in datecolumns:

      column_dict = { dc : {'category' : 'mnts', \
                           'origcategory' : category, \
                           'normalization_dict' : normalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : datecolumns, \
                           'categorylist' : datecolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())

    return df, column_dict_list

  def _process_tmzn(self, df, column, category, postprocess_dict, params = {}):
    '''
    #processing funciton depending on input format of datetime data 
    #that defaults as a passthrough
    #and when a 'timezone' parameter is recieved, 
    #converts timezone entries to UTC and then to the designated time zone
    #such as may be useful when a set of timestamps includes entries from multiple time zones
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
      
    #timezone can be passed as False for passthrough or timezone designation 
    #(where timezone designation is consistent with form accepted by Pandas tz_convert)
    if 'timezone' in params:
      timezone = params['timezone']
    else:
      timezone = False
      
    if inplace is not True:
      
      #copy source column into new column
      df, suffixoverlap_results = \
      self._df_copy_train(df, column, column + '_tmzn', suffixoverlap_results)
    
    else:
      
      suffixoverlap_results = \
      self._df_check_suffixoverlap(df, column + '_tmzn', suffixoverlap_results)
      
      df.rename(columns = {column : column + '_tmzn'}, inplace = True)
      
    if timezone is not False:

      df[column+'_tmzn'] = pd.to_datetime(df[column+'_tmzn'], errors = 'coerce', utc=True)
      
      df[column+'_tmzn'] = df[column+'_tmzn'].dt.tz_convert(timezone)

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []
    
    datecolumns = [column+'_tmzn']
    
    normalization_dict = {column+'_tmzn' : {'timezone' : timezone}}

    for dc in datecolumns:

      column_dict = { dc : {'category' : 'tmzn', \
                           'origcategory' : category, \
                           'normalization_dict' : normalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : datecolumns, \
                           'categorylist' : datecolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())

    return df, column_dict_list

  def _process_tmsc(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    """
    #time data segregated by time scale
    #with sin or cos applied to address periodicity
    #such as may be useful to return both by seperate transformation categories
    #accepts parameter 'scale' to distinguish between year/month/day/hour/minute/second
    #note that some scales can be returned combined by passing 
    #monthday/dayhourminute/hourminutesecond/minutesecond
    #accepts parameter 'suffix' for returned column header suffix
    #accets parameter 'function' to distinguish between sin/cos
    """
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    if 'scale' in params:
      #accepts year/month/day/hour/minute/second
      scale = params['scale']
    else:
      scale = 'monthday'
      
    if 'suffix' in params:
      #accepts column header suffix appender
      suffix = params['suffix']
    else:
      suffix = '_mdsn'
      
    if 'function' in params:
      #accepts sin/cos
      function = params['function']
    else:
      function = 'sin'
    
    time_column = column + suffix
    
    if inplace is not True:
      
      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self._df_copy_train(mdf_train, column, time_column, suffixoverlap_results)

      mdf_test[time_column] = mdf_test[column].copy()
    
    else:
      
      suffixoverlap_results = \
      self._df_check_suffixoverlap(mdf_train, time_column, suffixoverlap_results)
      
      mdf_train.rename(columns = {column : time_column}, inplace = True)
      mdf_test.rename(columns = {column : time_column}, inplace = True)
    
    #apply pd.to_datetime to column, note that the errors = 'coerce' needed for messy data
    mdf_train[time_column] = pd.to_datetime(mdf_train[time_column], errors = 'coerce')
    mdf_test[time_column] = pd.to_datetime(mdf_test[time_column], errors = 'coerce')
    
    #access time scale from one of year/month/day/hour/minute/second
    #monthday/dayhourminute/hourminutesecond/minutesecond
    if scale == 'year':
      mdf_train[time_column] = mdf_train[time_column].dt.year
      mdf_test[time_column] = mdf_test[time_column].dt.year
      
      #we'll scale periodicity by decade
      mdf_train[time_column] = (mdf_train[time_column]) * 2 * np.pi / 10
      mdf_test[time_column] = (mdf_test[time_column]) * 2 * np.pi / 10
      
    elif scale == 'month':
      mdf_train[time_column] = mdf_train[time_column].dt.month
      mdf_test[time_column] = mdf_test[time_column].dt.month
      
      #we'll scale periodicity by year
      mdf_train[time_column] = (mdf_train[time_column]) * 2 * np.pi / 12
      mdf_test[time_column] = (mdf_test[time_column]) * 2 * np.pi / 12
      
    elif scale == 'day':
      mdf_train[time_column] = mdf_train[time_column].dt.day
      mdf_test[time_column] = mdf_test[time_column].dt.day
      
      #we'll scale periodicity by week
      mdf_train[time_column] = (mdf_train[time_column]) * 2 * np.pi / 7
      mdf_test[time_column] = (mdf_test[time_column]) * 2 * np.pi / 7
      
    elif scale == 'hour':
      mdf_train[time_column] = mdf_train[time_column].dt.hour
      mdf_test[time_column] = mdf_test[time_column].dt.hour
      
      #we'll scale periodicity by day
      mdf_train[time_column] = (mdf_train[time_column]) * 2 * np.pi / 24
      mdf_test[time_column] = (mdf_test[time_column]) * 2 * np.pi / 24
      
    elif scale == 'minute':
      mdf_train[time_column] = mdf_train[time_column].dt.minute
      mdf_test[time_column] = mdf_test[time_column].dt.minute
      
      #we'll scale periodicity by hour
      mdf_train[time_column] = (mdf_train[time_column]) * 2 * np.pi / 60
      mdf_test[time_column] = (mdf_test[time_column]) * 2 * np.pi / 60
      
    elif scale == 'second':
      mdf_train[time_column] = mdf_train[time_column].dt.second
      mdf_test[time_column] = mdf_test[time_column].dt.second
      
      #we'll scale periodicity by minute
      mdf_train[time_column] = (mdf_train[time_column]) * 2 * np.pi / 60
      mdf_test[time_column] = (mdf_test[time_column]) * 2 * np.pi / 60
    
    elif scale == 'monthday':
      tempcolumn1 = time_column + '_tmp1'
      tempcolumn2 = time_column + '_tmp2'
      
      suffixoverlap_results = \
      self._df_check_suffixoverlap(mdf_train, [tempcolumn1, tempcolumn2], suffixoverlap_results)
      
      #temp1 is for number of days in month, temp2 is to handle leap year support
      mdf_train[tempcolumn1] = mdf_train[time_column].copy()
      mdf_train[tempcolumn2] = mdf_train[time_column].copy()
      
      mdf_train[tempcolumn1] = mdf_train[tempcolumn1].dt.month
      mdf_train[tempcolumn2] = mdf_train[tempcolumn2].dt.is_leap_year
      
      mdf_train[tempcolumn2] = \
      np.where(mdf_train[tempcolumn2], 29, 28)
      
      mdf_train[tempcolumn1] = \
      np.where(mdf_train[tempcolumn1].isin([1,3,5,7,8,10,12]), 31, mdf_train[tempcolumn1])
      
      mdf_train[tempcolumn1] = \
      np.where(mdf_train[tempcolumn1].isin([4,6,9,11]), 30, mdf_train[tempcolumn1])
      
      mdf_train[tempcolumn1] = \
      np.where(mdf_train[tempcolumn1].isin([2]), mdf_train[tempcolumn2], \
      mdf_train[tempcolumn1])
      
      #do same for test set
      mdf_test[tempcolumn1] = mdf_test[time_column].copy()
      mdf_test[tempcolumn2] = mdf_test[time_column].copy()
      
      mdf_test[tempcolumn1] = mdf_test[tempcolumn1].dt.month
      mdf_test[tempcolumn2] = mdf_test[tempcolumn2].dt.is_leap_year
      
      mdf_test[tempcolumn2] = \
      np.where(mdf_test[tempcolumn2], 29, 28)
      
      mdf_test[tempcolumn1] = \
      np.where(mdf_test[tempcolumn1].isin([1,3,5,7,8,10,12]), 31, mdf_test[tempcolumn1])
      
      mdf_test[tempcolumn1] = \
      np.where(mdf_test[tempcolumn1].isin([4,6,9,11]), 30, mdf_test[tempcolumn1])
      
      mdf_test[tempcolumn1] = \
      np.where(mdf_test[tempcolumn1].isin([2]), mdf_test[tempcolumn2], \
      mdf_test[tempcolumn1])
      
      #combine month and day, scale for trigonomic transform, periodicity by year
      mdf_train[time_column] = (mdf_train[time_column].dt.month + mdf_train[time_column].dt.day / \
      mdf_train[tempcolumn1]) * 2 * np.pi / 12
      
      mdf_test[time_column] = (mdf_test[time_column].dt.month + mdf_test[time_column].dt.day / \
      mdf_test[tempcolumn1]) * 2 * np.pi / 12
      
      #delete the support columns 
      del mdf_train[tempcolumn1]
      del mdf_test[tempcolumn1]

      del mdf_train[tempcolumn2]
      del mdf_test[tempcolumn2]
      
    elif scale == 'dayhourminute':
      #we'll scale periodicity by week
      mdf_train[time_column] = (mdf_train[time_column].dt.day + mdf_train[time_column].dt.hour / 24 + mdf_train[time_column].dt.minute / 24 / 60) * 2 * np.pi / 7
      mdf_test[time_column] = (mdf_test[time_column].dt.day + mdf_test[time_column].dt.hour / 24 + mdf_test[time_column].dt.minute / 24 / 60) * 2 * np.pi / 7

    elif scale == 'hourminutesecond':
      #we'll scale periodicity by day
      mdf_train[time_column] = (mdf_train[time_column].dt.hour + mdf_train[time_column].dt.minute / 60 + mdf_train[time_column].dt.second / 60 / 60) * 2 * np.pi / 24
      mdf_test[time_column] = (mdf_test[time_column].dt.hour + mdf_test[time_column].dt.minute / 60 + mdf_test[time_column].dt.second / 60 / 60) * 2 * np.pi / 24

    elif scale == 'minutesecond':
      #we'll scale periodicity by hour
      mdf_train[time_column] = (mdf_train[time_column].dt.minute + mdf_train[time_column].dt.second / 60) * 2 * np.pi / 60
      mdf_test[time_column] = (mdf_test[time_column].dt.minute + mdf_test[time_column].dt.second / 60) * 2 * np.pi / 60
      
    #grab a few drift metrics, we'll evaluate prior to trigometric transform
    timemean = mdf_train[time_column].mean()
    timemax = mdf_train[time_column].max()
    timemin = mdf_train[time_column].min()
    timestd = mdf_train[time_column].std()
    
    #default infill is adjacent cell
    mdf_train[time_column] = mdf_train[time_column].fillna(method='ffill')
    mdf_train[time_column] = mdf_train[time_column].fillna(method='bfill')
    
    mdf_test[time_column] = mdf_test[time_column].fillna(method='ffill')
    mdf_test[time_column] = mdf_test[time_column].fillna(method='bfill')
    
    #backup default infill for cases without valid entries
    mdf_train[time_column] = mdf_train[time_column].fillna(0)
    mdf_test[time_column] = mdf_test[time_column].fillna(0)
    
    #we'll only return a column if meaningful training data present
    #because it is not uncommon for time series to only contain single time scale recording
    if mdf_train[time_column].nunique() > 1:

      #apply trigometric transform

      if function == 'sin':

        mdf_train[time_column] = np.sin(mdf_train[time_column])
        mdf_test[time_column] = np.sin(mdf_test[time_column])

      if function == 'cos':

        mdf_train[time_column] = np.cos(mdf_train[time_column])
        mdf_test[time_column] = np.cos(mdf_test[time_column])

      #populate data structures
      column_dict_list = []
      categorylist = [time_column]

      for tc in categorylist:
        norm_dict = {tc : {'scale'         : scale, \
                           'suffix'        : suffix, \
                           'function'      : function, \
                           'timemean'      : timemean, \
                           'timemax'       : timemax, \
                           'timemin'       : timemin, \
                           'timestd'       : timestd}}

        column_dict = {tc : {'category' : 'tmsc', \
                             'origcategory' : category, \
                             'normalization_dict' : norm_dict, \
                             'origcolumn' : column, \
                             'inputcolumn' : column, \
                             'columnslist' : categorylist, \
                             'categorylist' : categorylist, \
                             'infillmodel' : False, \
                             'infillcomplete' : False, \
                             'suffixoverlap_results' : suffixoverlap_results, \
                             'deletecolumn' : False}}

        column_dict_list.append(column_dict.copy())
        
    else:
      del mdf_train[time_column]
      del mdf_test[time_column]
      column_dict_list = []
    
    return mdf_train, mdf_test, column_dict_list

  def _process_time(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    """
    #z-score normalized time data segregated by a particular time scale
    #accepts parameter 'scale' to distinguish between year/month/day/hour/minute/second
    #accepts parameter 'suffix' for returned column header suffix
    #accepts parameter 'normalization' to distinguish between zscore/minmax/unscaled
    """
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    if 'scale' in params:
      #accepts year/month/day/hour/minute/second
      scale = params['scale']
    else:
      scale = 'year'
      
    if 'suffix' in params:
      #accepts column header suffix appender
      suffix = params['suffix']
    else:
      suffix = '_year'
      
    if 'normalization' in params:
      #accepts zscore/minmax/unscaled
      normalization = params['normalization']
    else:
      normalization = 'zscore'
      
    time_column = column + suffix
    
    if inplace is not True:
      
      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self._df_copy_train(mdf_train, column, time_column, suffixoverlap_results)

      mdf_test[time_column] = mdf_test[column].copy()
    
    else:
      
      suffixoverlap_results = \
      self._df_check_suffixoverlap(mdf_train, time_column, suffixoverlap_results)
      
      mdf_train.rename(columns = {column : time_column}, inplace = True)
      mdf_test.rename(columns = {column : time_column}, inplace = True)
    
    #apply pd.to_datetime to column, note that the errors = 'coerce' needed for messy data
    mdf_train[time_column] = pd.to_datetime(mdf_train[time_column], errors = 'coerce')
    mdf_test[time_column] = pd.to_datetime(mdf_test[time_column], errors = 'coerce')
    
    #access time scale from one of year/month/day/hour/minute/second
    if scale == 'year':
      mdf_train[time_column] = mdf_train[time_column].dt.year
      mdf_test[time_column] = mdf_test[time_column].dt.year
    elif scale == 'month':
      mdf_train[time_column] = mdf_train[time_column].dt.month
      mdf_test[time_column] = mdf_test[time_column].dt.month
    elif scale == 'day':
      mdf_train[time_column] = mdf_train[time_column].dt.day
      mdf_test[time_column] = mdf_test[time_column].dt.day
    elif scale == 'hour':
      mdf_train[time_column] = mdf_train[time_column].dt.hour
      mdf_test[time_column] = mdf_test[time_column].dt.hour
    elif scale == 'minute':
      mdf_train[time_column] = mdf_train[time_column].dt.minute
      mdf_test[time_column] = mdf_test[time_column].dt.minute
    elif scale == 'second':
      mdf_train[time_column] = mdf_train[time_column].dt.second
      mdf_test[time_column] = mdf_test[time_column].dt.second
      
    #default infill is adjacent cell
    mdf_train[time_column] = mdf_train[time_column].fillna(method='ffill')
    mdf_train[time_column] = mdf_train[time_column].fillna(method='bfill')
    
    mdf_test[time_column] = mdf_test[time_column].fillna(method='ffill')
    mdf_test[time_column] = mdf_test[time_column].fillna(method='bfill')
      
    #grab a few drift metrics
    timemean = mdf_train[time_column].mean()
    timemax = mdf_train[time_column].max()
    timemin = mdf_train[time_column].min()
    timestd = mdf_train[time_column].std()

    maxminusmin = timemax - timemin
    
    #backup default infill for cases without valid entries
    mdf_train[time_column] = mdf_train[time_column].fillna(0)
    mdf_test[time_column] = mdf_test[time_column].fillna(0)
      
    #formula for scaling is (x - scaler) / divisor
    #normalizaiton is either zscore/minmax/unscaled
    if normalization == 'zscore':
      scaler = timemean
      divisor = timestd
    if normalization == 'minmax':
      scaler = timemin
      divisor = maxminusmin
    if normalization == 'unscaled':
      scaler = 0
      divisor = 1
    
    if divisor == 0 or divisor != divisor:
      divisor = 1
      
    if scaler != scaler:
      scaler = 0
      
    #apply normalization
    if normalization != 'unscaled':
      mdf_train[time_column] = (mdf_train[time_column] - scaler) / divisor
      mdf_test[time_column] = (mdf_test[time_column] - scaler) / divisor
      
    #we'll only return a column if meaningful training data present
    #because it is not uncommon for time series to only contain single time scale recording
    if mdf_train[time_column].nunique() > 1:
      
      #populate data structures
      column_dict_list = []
      categorylist = [time_column]

      for tc in categorylist:
        norm_dict = {tc : {'scale'         : scale, \
                           'suffix'        : suffix, \
                           'normalization' : normalization, \
                           'scaler'        : scaler, \
                           'divisor'       : divisor, \
                           'timemean'      : timemean, \
                           'timemax'       : timemax, \
                           'timemin'       : timemin, \
                           'timestd'       : timestd, \
                           'maxminusmin'   : maxminusmin}}

        column_dict = {tc : {'category' : 'time', \
                             'origcategory' : category, \
                             'normalization_dict' : norm_dict, \
                             'origcolumn' : column, \
                             'inputcolumn' : column, \
                             'columnslist' : categorylist, \
                             'categorylist' : categorylist, \
                             'infillmodel' : False, \
                             'infillcomplete' : False, \
                             'suffixoverlap_results' : suffixoverlap_results, \
                             'deletecolumn' : False}}

        column_dict_list.append(column_dict.copy())
        
    else:
      del mdf_train[time_column]
      del mdf_test[time_column]
      column_dict_list = []
    
    return mdf_train, mdf_test, column_dict_list

  def _process_bxcx(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    '''
    Applies Box-Cox transform to an all-positive numerical set.
    '''
    
    suffixoverlap_results = {}
    
    #df_train, nmbrcolumns, nmbrnormalization_dict, categorylist = \
    mdf_train, column_dict_list = \
    self._process_bxcx_support(mdf_train, column, category, 1, bxcx_lmbda = None, \
                              trnsfrm_mean = None)

    #grab the normalization_dict associated with the bxcx category
    columnkeybxcx = column + '_bxcx'
    for column_dict in column_dict_list:
      if columnkeybxcx in column_dict:
        bxcxnormalization_dict = column_dict[columnkeybxcx]['normalization_dict'][columnkeybxcx]

    #df_test, nmbrcolumns, _1, _2 = \
    mdf_test, _1 = \
    self._process_bxcx_support(mdf_test, column, category, 1, bxcx_lmbda = \
                             bxcxnormalization_dict['bxcx_lmbda'], \
                             trnsfrm_mean = bxcxnormalization_dict['trnsfrm_mean'])

    return mdf_train, mdf_test, column_dict_list

  def _process_bxcx_support(self, df, column, category, bxcxerrorcorrect, bxcx_lmbda = None, trnsfrm_mean = None):
    '''                      
    #process_bxcx(df, column, bxcx_lmbda = None, trnsfrm_mean = None, trnsfrm_std = None)
    #function that takes as input a dataframe with numnerical column for purposes
    #of applying a box-cox transformation. If lmbda = None it will infer a suitable
    #lambda value by minimizing log likelihood using SciPy's stats boxcox call. If
    #we pass a mean or std value it will apply the mean for the initial infill and 
    #use the values to apply postprocess_numerical  function. 
    #Returns transformed dataframe, a list nmbrcolumns of the associated columns,
    #and a normalization dictionary nmbrnormalization_dict which we'll use for our
    #postprocess_dict, and the parameter lmbda that was used
    #expect this approach works better than our prior numerical address when the 
    #distribution is less thin tailed
    '''
    
    suffixoverlap_results = {}
    
    bxcxcolumn = column + '_bxcx'

    df, suffixoverlap_results = \
    self._df_copy_train(df, column, bxcxcolumn, suffixoverlap_results)

    #convert all values to either numeric or NaN
    df[bxcxcolumn] = pd.to_numeric(df[bxcxcolumn], errors='coerce')
    #convert non-positive values to nan
    df.loc[df[bxcxcolumn] <= 0, (bxcxcolumn)] = np.nan

    #get the mean value to apply to infill
    if trnsfrm_mean == None:
      #get mean of training data
      mean = df[bxcxcolumn].mean()  

    else:
      mean = trnsfrm_mean

    #edge case
    if mean != mean or mean <= 0:
      mean = 0
      bxcx_lmbda = False

    #replace missing data with training set mean
    df[bxcxcolumn] = df[bxcxcolumn].fillna(mean)
    
    #edge case to avoid stats.boxcox error
    if df[bxcxcolumn].nunique() == 1:
      df[bxcxcolumn] = 0
      
      #we'll use convention that if training data is set to 0 then so will all subsequent data
      bxcx_lmbda = False
      
    else:

      #apply box-cox transformation to generate a new column
      #note the returns are different based on whether we passed a lmbda value

      if bxcx_lmbda == None:

        df[bxcxcolumn], bxcx_lmbda = stats.boxcox(df[bxcxcolumn])
        df[bxcxcolumn] *= bxcxerrorcorrect
        
      elif bxcx_lmbda is False:
        
        df[bxcxcolumn] = 0

      else:

        df[bxcxcolumn] = stats.boxcox(df[bxcxcolumn], lmbda = bxcx_lmbda)
        df[bxcxcolumn] *= bxcxerrorcorrect

    #this is to address an error when bxcx transofrm produces overflow
    #I'm not sure of cause, showed up in the housing set)
    bxcxerrorcorrect = 1
    if max(df[bxcxcolumn]) > (2 ** 31 - 1):
      bxcxerrorcorrect = 0
      df[bxcxcolumn] = 0
      bxcxcolumn = bxcxcolumn
      print("overflow condition found in boxcox transofrm, column set to 0: ", bxcxcolumn)

#     #replace original column
#     del df[column]

#     df[column] = df[column + '_temp'].copy()

#     del df[column + '_temp']

#     #change data type for memory savings
#     df[column + '_bxcx'] = df[column + '_bxcx'].astype(np.float32)

    #output of a list of the created column names
    #nmbrcolumns = [column + '_nmbr', column + '_bxcx', column + '_NArw']
    nmbrcolumns = [bxcxcolumn]

    #create list of columns associated with categorical transform (blank for now)
    categorylist = []

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      #save a dictionary of the associated column mean and std

      normalization_dict = {nc : {'trnsfrm_mean' : mean, \
                                  'bxcx_lmbda' : bxcx_lmbda, \
                                  'bxcxerrorcorrect' : bxcxerrorcorrect, \
                                  'mean' : mean}}

      column_dict = { nc : {'category' : 'bxcx', \
                           'origcategory' : category, \
                           'normalization_dict' : normalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())

    #return df, nmbrcolumns, nmbrnormalization_dict, categorylist
    return df, column_dict_list

  def _process_log0(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    '''
    #process_log0(mdf_train, mdf_test, column, category)
    #function to apply logatrithmic transform
    #takes as arguement pandas dataframe of training and test data (mdf_train), (mdf_test)\
    #and the name of the column string ('column') and parent category (category)
    #applies a logarithmic transform (base 10)
    #replaces zeros, negative, and missing or improperly formatted data with post-log mean as default infill
    #returns same dataframes with new column of name column + '_log0'
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    if inplace is not True:
      
      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self._df_copy_train(mdf_train, column, column + '_log0', suffixoverlap_results)

      mdf_test[column + '_log0'] = mdf_test[column].copy()
    
    else:
      
      suffixoverlap_results = \
      self._df_check_suffixoverlap(mdf_train, column + '_log0', suffixoverlap_results)
      
      mdf_train.rename(columns = {column : column + '_log0'}, inplace = True)
      mdf_test.rename(columns = {column : column + '_log0'}, inplace = True)

    #convert all values to either numeric or NaN
    mdf_train[column + '_log0'] = pd.to_numeric(mdf_train[column + '_log0'], errors='coerce')
    mdf_test[column + '_log0'] = pd.to_numeric(mdf_test[column + '_log0'], errors='coerce')
    
#     #replace all zeros with nan for the log operation
#     zeroreplace = {0 : np.nan}
#     mdf_train[column + '_log0'] = mdf_train[column + '_log0'].replace(zeroreplace)
#     mdf_test[column + '_log0'] = mdf_test[column + '_log0'].replace(zeroreplace)
    
    #replace all non-positive with nan for the log operation
    mdf_train.loc[mdf_train[column + '_log0'] <= 0, (column + '_log0')] = np.nan
    mdf_test.loc[mdf_test[column + '_log0'] <= 0, (column + '_log0')] = np.nan
    
    #log transform column
    #note that this replaces negative values with nan which we will infill with mean
    mdf_train[column + '_log0'] = np.log10(mdf_train[column + '_log0'])
    mdf_test[column + '_log0'] = np.log10(mdf_test[column + '_log0'])
    
    #get mean of train set
    meanlog = mdf_train[column + '_log0'].mean()
    
    if meanlog != meanlog:
      meanlog = 0

    #replace missing data with training set mean
    mdf_train[column + '_log0'] = mdf_train[column + '_log0'].fillna(meanlog)
    mdf_test[column + '_log0'] = mdf_test[column + '_log0'].fillna(meanlog)

#     #replace missing data with 0
#     mdf_train[column + '_log0'] = mdf_train[column + '_log0'].fillna(0)
#     mdf_test[column + '_log0'] = mdf_test[column + '_log0'].fillna(0)

#     #change data type for memory savings
#     mdf_train[column + '_log0'] = mdf_train[column + '_log0'].astype(np.float32)
#     mdf_test[column + '_log0'] = mdf_test[column + '_log0'].astype(np.float32)

    #create list of columns
    nmbrcolumns = [column + '_log0']

    nmbrnormalization_dict = {column + '_log0' : {'meanlog' : meanlog}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'log0', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
        
    return mdf_train, mdf_test, column_dict_list
  
  def _process_logn(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    '''
    #process_logn(mdf_train, mdf_test, column, category)
    #function to apply natural logatrithmic transform
    #takes as arguement pandas dataframe of training and test data (mdf_train), (mdf_test)\
    #and the name of the column string ('column') and parent category (category)
    #applies a logarithmic transform (base e)
    #replaces zeros, negative, and missing or improperly formatted data with post-log mean as default infill
    #returns same dataframes with new column of name column + '_logn'
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    if inplace is not True:
      
      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self._df_copy_train(mdf_train, column, column + '_logn', suffixoverlap_results)

      mdf_test[column + '_logn'] = mdf_test[column].copy()
    
    else:
      
      suffixoverlap_results = \
      self._df_check_suffixoverlap(mdf_train, column + '_logn', suffixoverlap_results)
      
      mdf_train.rename(columns = {column : column + '_logn'}, inplace = True)
      mdf_test.rename(columns = {column : column + '_logn'}, inplace = True)

    #convert all values to either numeric or NaN
    mdf_train[column + '_logn'] = pd.to_numeric(mdf_train[column + '_logn'], errors='coerce')
    mdf_test[column + '_logn'] = pd.to_numeric(mdf_test[column + '_logn'], errors='coerce')
    
#     #replace all zeros with nan for the log operation
#     zeroreplace = {0 : np.nan}
#     mdf_train[column + '_log0'] = mdf_train[column + '_log0'].replace(zeroreplace)
#     mdf_test[column + '_log0'] = mdf_test[column + '_log0'].replace(zeroreplace)
    
    #replace all non-positive with nan for the log operation
    mdf_train.loc[mdf_train[column + '_logn'] <= 0, (column + '_logn')] = np.nan
    mdf_test.loc[mdf_test[column + '_logn'] <= 0, (column + '_logn')] = np.nan
    
    #log transform column
    #note that this replaces negative values with nan which we will infill with mean
    mdf_train[column + '_logn'] = np.log(mdf_train[column + '_logn'])
    mdf_test[column + '_logn'] = np.log(mdf_test[column + '_logn'])
    
    #get mean of train set
    meanlog = mdf_train[column + '_logn'].mean()
    
    if meanlog != meanlog:
      meanlog = 0

    #replace missing data with training set mean
    mdf_train[column + '_logn'] = mdf_train[column + '_logn'].fillna(meanlog)
    mdf_test[column + '_logn'] = mdf_test[column + '_logn'].fillna(meanlog)

#     #replace missing data with 0
#     mdf_train[column + '_log0'] = mdf_train[column + '_log0'].fillna(0)
#     mdf_test[column + '_log0'] = mdf_test[column + '_log0'].fillna(0)

#     #change data type for memory savings
#     mdf_train[column + '_log0'] = mdf_train[column + '_log0'].astype(np.float32)
#     mdf_test[column + '_log0'] = mdf_test[column + '_log0'].astype(np.float32)

    #create list of columns
    nmbrcolumns = [column + '_logn']

    nmbrnormalization_dict = {column + '_logn' : {'meanlog' : meanlog}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'logn', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
    
    return mdf_train, mdf_test, column_dict_list
  
  def _process_sqrt(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    '''
    #process_sqrt(mdf_train, mdf_test, column, category)
    #function to apply square root transform
    #takes as arguement pandas dataframe of training and test data (mdf_train), (mdf_test)\
    #and the name of the column string ('column') and parent category (category)
    #applies a square root transform
    #replaces zeros, negative, and missing or improperly formatted data with post-log mean as default infill
    #returns same dataframes with new column of name column + '_log0'
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    if inplace is not True:
      
      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self._df_copy_train(mdf_train, column, column + '_sqrt', suffixoverlap_results)

      mdf_test[column + '_sqrt'] = mdf_test[column].copy()
    
    else:
      
      suffixoverlap_results = \
      self._df_check_suffixoverlap(mdf_train, column + '_sqrt', suffixoverlap_results)
      
      mdf_train.rename(columns = {column : column + '_sqrt'}, inplace = True)
      mdf_test.rename(columns = {column : column + '_sqrt'}, inplace = True)

    #convert all values to either numeric or NaN
    mdf_train[column + '_sqrt'] = pd.to_numeric(mdf_train[column + '_sqrt'], errors='coerce')
    mdf_test[column + '_sqrt'] = pd.to_numeric(mdf_test[column + '_sqrt'], errors='coerce')
    
#     #replace all zeros with nan for the log operation
#     zeroreplace = {0 : np.nan}
#     mdf_train[column + '_log0'] = mdf_train[column + '_log0'].replace(zeroreplace)
#     mdf_test[column + '_log0'] = mdf_test[column + '_log0'].replace(zeroreplace)
    
    #replace all non-positive with nan for the log operation
    mdf_train.loc[mdf_train[column + '_sqrt'] < 0, (column + '_sqrt')] = np.nan
    mdf_test.loc[mdf_test[column + '_sqrt'] < 0, (column + '_sqrt')] = np.nan
    
    #log transform column
    #note that this replaces negative values with nan which we will infill with mean
    mdf_train[column + '_sqrt'] = np.sqrt(mdf_train[column + '_sqrt'])
    mdf_test[column + '_sqrt'] = np.sqrt(mdf_test[column + '_sqrt'])
    
    #get mean of train set
    meansqrt = mdf_train[column + '_sqrt'].mean()
    
    if meansqrt != meansqrt:
      meansqrt = 0

    #replace missing data with training set mean
    mdf_train[column + '_sqrt'] = mdf_train[column + '_sqrt'].fillna(meansqrt)
    mdf_test[column + '_sqrt'] = mdf_test[column + '_sqrt'].fillna(meansqrt)

#     #replace missing data with 0
#     mdf_train[column + '_log0'] = mdf_train[column + '_log0'].fillna(0)
#     mdf_test[column + '_log0'] = mdf_test[column + '_log0'].fillna(0)

#     #change data type for memory savings
#     mdf_train[column + '_log0'] = mdf_train[column + '_log0'].astype(np.float32)
#     mdf_test[column + '_log0'] = mdf_test[column + '_log0'].astype(np.float32)

    #create list of columns
    nmbrcolumns = [column + '_sqrt']

    nmbrnormalization_dict = {column + '_sqrt' : {'meansqrt' : meansqrt}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'sqrt', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
    
    return mdf_train, mdf_test, column_dict_list
  
  def _process_addd(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    '''
    #process_addd(.)
    #function to apply addition transform
    #takes as arguement pandas dataframe of training and test data (mdf_train), (mdf_test)\
    #and the name of the column string ('column') and parent category (category)
    #accepts parameter 'add' for amount of addition, otherwise defaults to adding 1
    #applies an addition transform
    #replaces non-numeric entries with set mean after addition
    #returns same dataframes with new column of name column + '_addd'
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    if 'add' in params:
        
      add = params['add']
    
    else:
      
      add = 1
    
    if inplace is not True:
      
      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self._df_copy_train(mdf_train, column, column + '_addd', suffixoverlap_results)

      mdf_test[column + '_addd'] = mdf_test[column].copy()
    
    else:
      
      suffixoverlap_results = \
      self._df_check_suffixoverlap(mdf_train, column + '_addd', suffixoverlap_results)
      
      mdf_train.rename(columns = {column : column + '_addd'}, inplace = True)
      mdf_test.rename(columns = {column : column + '_addd'}, inplace = True)

    #convert all values to either numeric or NaN
    mdf_train[column + '_addd'] = pd.to_numeric(mdf_train[column + '_addd'], errors='coerce')
    mdf_test[column + '_addd'] = pd.to_numeric(mdf_test[column + '_addd'], errors='coerce')
    
    
    #apply addition
    mdf_train[column + '_addd'] = mdf_train[column + '_addd'] + add
    mdf_test[column + '_addd'] = mdf_test[column + '_addd'] + add
    
    #get mean of train set
    mean = mdf_train[column + '_addd'].mean()
    
    if mean != mean:
      mean = 0

    #replace missing data with training set mean
    mdf_train[column + '_addd'] = mdf_train[column + '_addd'].fillna(mean)
    mdf_test[column + '_addd'] = mdf_test[column + '_addd'].fillna(mean)

    #create list of columns
    nmbrcolumns = [column + '_addd']

    nmbrnormalization_dict = {column + '_addd' : {'mean' : mean, \
                                                  'add' : add}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'addd', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
        
    return mdf_train, mdf_test, column_dict_list
  
  def _process_sbtr(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    '''
    #process_sbtr(.)
    #function to apply subtraction transform
    #takes as arguement pandas dataframe of training and test data (mdf_train), (mdf_test)\
    #and the name of the column string ('column') and parent category (category)
    #accepts parameter 'subtract' for amount of subtraction, otherwise defaults to subtracting 1
    #applies a subtraction transform
    #replaces non-numeric entries with set mean after subtraction
    #returns same dataframes with new column of name column + '_sbtr'
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    if 'subtract' in params:
        
      subtract = params['subtract']
    
    else:
      
      subtract = 1
    
    if inplace is not True:
      
      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self._df_copy_train(mdf_train, column, column + '_sbtr', suffixoverlap_results)

      mdf_test[column + '_sbtr'] = mdf_test[column].copy()
    
    else:
      
      suffixoverlap_results = \
      self._df_check_suffixoverlap(mdf_train, column + '_sbtr', suffixoverlap_results)
      
      mdf_train.rename(columns = {column : column + '_sbtr'}, inplace = True)
      mdf_test.rename(columns = {column : column + '_sbtr'}, inplace = True)

    #convert all values to either numeric or NaN
    mdf_train[column + '_sbtr'] = pd.to_numeric(mdf_train[column + '_sbtr'], errors='coerce')
    mdf_test[column + '_sbtr'] = pd.to_numeric(mdf_test[column + '_sbtr'], errors='coerce')
    
    #apply subtraction
    mdf_train[column + '_sbtr'] = mdf_train[column + '_sbtr'] - subtract
    mdf_test[column + '_sbtr'] = mdf_test[column + '_sbtr'] - subtract
    
    #get mean of train set
    mean = mdf_train[column + '_sbtr'].mean()
    
    if mean != mean:
      mean = 0

    #replace missing data with training set mean
    mdf_train[column + '_sbtr'] = mdf_train[column + '_sbtr'].fillna(mean)
    mdf_test[column + '_sbtr'] = mdf_test[column + '_sbtr'].fillna(mean)

    #create list of columns
    nmbrcolumns = [column + '_sbtr']

    nmbrnormalization_dict = {column + '_sbtr' : {'mean' : mean, \
                                                  'subtract' : subtract}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'sbtr', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
        
    return mdf_train, mdf_test, column_dict_list
  
  def _process_mltp(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    '''
    #process_mltp(.)
    #function to apply multiplication transform
    #takes as arguement pandas dataframe of training and test data (mdf_train), (mdf_test)\
    #and the name of the column string ('column') and parent category (category)
    #accepts parameter 'multiply' for amount of addition, otherwise defaults to multiplying 2
    #applies an multiplication transform
    #replaces non-numeric entries with set mean after addition
    #returns same dataframes with new column of name column + '_mltp'
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    if 'multiply' in params:
        
      multiply = params['multiply']
    
    else:
      
      multiply = 2
    
    if inplace is not True:
      
      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self._df_copy_train(mdf_train, column, column + '_mltp', suffixoverlap_results)

      mdf_test[column + '_mltp'] = mdf_test[column].copy()
    
    else:
      
      suffixoverlap_results = \
      self._df_check_suffixoverlap(mdf_train, column + '_mltp', suffixoverlap_results)
      
      mdf_train.rename(columns = {column : column + '_mltp'}, inplace = True)
      mdf_test.rename(columns = {column : column + '_mltp'}, inplace = True)

    #convert all values to either numeric or NaN
    mdf_train[column + '_mltp'] = pd.to_numeric(mdf_train[column + '_mltp'], errors='coerce')
    mdf_test[column + '_mltp'] = pd.to_numeric(mdf_test[column + '_mltp'], errors='coerce')
    
    #apply multiplication
    mdf_train[column + '_mltp'] = mdf_train[column + '_mltp'] * multiply
    mdf_test[column + '_mltp'] = mdf_test[column + '_mltp'] * multiply
    
    #get mean of train set
    mean = mdf_train[column + '_mltp'].mean()
    
    if mean != mean:
      mean = 0

    #replace missing data with training set mean
    mdf_train[column + '_mltp'] = mdf_train[column + '_mltp'].fillna(mean)
    mdf_test[column + '_mltp'] = mdf_test[column + '_mltp'].fillna(mean)

    #create list of columns
    nmbrcolumns = [column + '_mltp']


    nmbrnormalization_dict = {column + '_mltp' : {'mean' : mean, \
                                                  'multiply' : multiply}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'mltp', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
        
    return mdf_train, mdf_test, column_dict_list
  
  def _process_divd(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    '''
    #process_divd(.)
    #function to apply division transform
    #accepts parameter 'divide' for amount of division, otherwise defaults to dividing by 2
    #applies an division transform
    #replaces non-numeric entries with set mean after division
    #returns same dataframes with new column of name column + '_divd'
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    if 'divide' in params:
        
      divide = params['divide']
    
    else:
      
      divide = 2
      
    #special case override to avoid div by 0
    if divide == 0:
      divide = 1
    
    if inplace is not True:
      
      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self._df_copy_train(mdf_train, column, column + '_divd', suffixoverlap_results)

      mdf_test[column + '_divd'] = mdf_test[column].copy()
    
    else:
      
      suffixoverlap_results = \
      self._df_check_suffixoverlap(mdf_train, column + '_divd', suffixoverlap_results)
      
      mdf_train.rename(columns = {column : column + '_divd'}, inplace = True)
      mdf_test.rename(columns = {column : column + '_divd'}, inplace = True)

    #convert all values to either numeric or NaN
    mdf_train[column + '_divd'] = pd.to_numeric(mdf_train[column + '_divd'], errors='coerce')
    mdf_test[column + '_divd'] = pd.to_numeric(mdf_test[column + '_divd'], errors='coerce')
    
    
    #apply multiplication
    mdf_train[column + '_divd'] = mdf_train[column + '_divd'] / divide
    mdf_test[column + '_divd'] = mdf_test[column + '_divd'] / divide
    
    #get mean of train set
    mean = mdf_train[column + '_divd'].mean()
    
    if mean != mean:
      mean = 0

    #replace missing data with training set mean
    mdf_train[column + '_divd'] = mdf_train[column + '_divd'].fillna(mean)
    mdf_test[column + '_divd'] = mdf_test[column + '_divd'].fillna(mean)


    #create list of columns
    nmbrcolumns = [column + '_divd']


    nmbrnormalization_dict = {column + '_divd' : {'mean' : mean, \
                                                  'divide' : divide}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'divd', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
        
    return mdf_train, mdf_test, column_dict_list
  
  def _process_rais(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    '''
    #process_rais(.)
    #function to apply raise to a power transform
    #accepts parameter 'raiser' for amount of power, otherwise defaults to square (raise by 2)
    #applies an raise transform
    #replaces non-numeric entries with set mean after raise
    #returns same dataframes with new column of name column + '_rais'
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    if 'raiser' in params:
        
      raiser = params['raiser']
    
    else:
      
      raiser = 2
    
    if inplace is not True:
      
      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self._df_copy_train(mdf_train, column, column + '_rais', suffixoverlap_results)

      mdf_test[column + '_rais'] = mdf_test[column].copy()
    
    else:
      
      suffixoverlap_results = \
      self._df_check_suffixoverlap(mdf_train, column + '_rais', suffixoverlap_results)
      
      mdf_train.rename(columns = {column : column + '_rais'}, inplace = True)
      mdf_test.rename(columns = {column : column + '_rais'}, inplace = True)

    #convert all values to either numeric or NaN
    mdf_train[column + '_rais'] = pd.to_numeric(mdf_train[column + '_rais'], errors='coerce')
    mdf_test[column + '_rais'] = pd.to_numeric(mdf_test[column + '_rais'], errors='coerce')
    
    #apply addition
    mdf_train[column + '_rais'] = mdf_train[column + '_rais'] ** raiser
    mdf_test[column + '_rais'] = mdf_test[column + '_rais'] ** raiser
    
    #get mean of train set
    mean = mdf_train[column + '_rais'].mean()
    
    if mean != mean:
      mean = 0

    #replace missing data with training set mean
    mdf_train[column + '_rais'] = mdf_train[column + '_rais'].fillna(mean)
    mdf_test[column + '_rais'] = mdf_test[column + '_rais'].fillna(mean)

    #create list of columns
    nmbrcolumns = [column + '_rais']

    nmbrnormalization_dict = {column + '_rais' : {'mean' : mean, \
                                                  'raiser' : raiser}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'rais', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
        
    return mdf_train, mdf_test, column_dict_list
  
  def _process_absl(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    '''
    #process_absl(.)
    #function to apply absolute transform
    #does not accept paraemters
    #applies an absolute transform
    #replaces non-numeric entries with set mean after transform
    #returns same dataframes with new column of name column + '_absl'
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    if inplace is not True:
      
      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self._df_copy_train(mdf_train, column, column + '_absl', suffixoverlap_results)

      mdf_test[column + '_absl'] = mdf_test[column].copy()
    
    else:
      
      suffixoverlap_results = \
      self._df_check_suffixoverlap(mdf_train, column + '_absl', suffixoverlap_results)
      
      mdf_train.rename(columns = {column : column + '_absl'}, inplace = True)
      mdf_test.rename(columns = {column : column + '_absl'}, inplace = True)

    #convert all values to either numeric or NaN
    mdf_train[column + '_absl'] = pd.to_numeric(mdf_train[column + '_absl'], errors='coerce')
    mdf_test[column + '_absl'] = pd.to_numeric(mdf_test[column + '_absl'], errors='coerce')
    
    #apply addition
    mdf_train[column + '_absl'] = mdf_train[column + '_absl'].abs()
    mdf_test[column + '_absl'] = mdf_test[column + '_absl'].abs()
    
    #get mean of train set
    mean = mdf_train[column + '_absl'].mean()
    
    if mean != mean:
      mean = 0

    #replace missing data with training set mean
    mdf_train[column + '_absl'] = mdf_train[column + '_absl'].fillna(mean)
    mdf_test[column + '_absl'] = mdf_test[column + '_absl'].fillna(mean)

    #create list of columns
    nmbrcolumns = [column + '_absl']

    nmbrnormalization_dict = {column + '_absl' : {'mean' : mean}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'absl', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
        
    return mdf_train, mdf_test, column_dict_list

  def _process_pwrs(self, mdf_train, mdf_test, column, category, postprocess_dict, params = {}):
    '''
    #processes a numerical set by creating bins corresponding to powers
    #of ten in one hot encoded columns
    
    #pwrs will be intended for a raw set that is not yet normalized
    
    #postiive values encoded under column 'column' + '_10^#' where # is power of 10
    #0 and negative values considered infill with no activations
    
    #if all values are infill no columns returned
    
    #accepts boolean 'negvalues' parameter, defaults False, True activates encoding for values <0
    '''
    
    suffixoverlap_results = {}
    
    if 'negvalues' in params:
      negvalues = params['negvalues']
    else:
      negvalues = False
    
    tempcolumn = column + '_-10^'

    #store original column for later reversion
    mdf_train, suffixoverlap_results = \
    self._df_copy_train(mdf_train, column, tempcolumn, suffixoverlap_results)
    
    mdf_test[tempcolumn] = mdf_test[column].copy()

    #convert all values to either numeric or NaN
    mdf_train[tempcolumn] = pd.to_numeric(mdf_train[tempcolumn], errors='coerce')
    mdf_test[tempcolumn] = pd.to_numeric(mdf_test[tempcolumn], errors='coerce')
    
    #create copy with negative values
    negtempcolumn = column + '_negtemp'
    mdf_train, suffixoverlap_results = \
    self._df_copy_train(mdf_train, tempcolumn, negtempcolumn, suffixoverlap_results)
    
    mdf_test[negtempcolumn] = mdf_test[tempcolumn].copy()
    
    #convert all values in negtempcolumn >= 0 to Nan
    mdf_train[negtempcolumn] = \
    np.where(mdf_train[negtempcolumn] >= 0, np.nan, mdf_train[negtempcolumn])
    mdf_test[negtempcolumn] = \
    np.where(mdf_test[negtempcolumn] >= 0, np.nan, mdf_test[negtempcolumn])
    
    #convert all values <= 0 to Nan
    mdf_train[tempcolumn] = \
    np.where(mdf_train[tempcolumn] <= 0, np.nan, mdf_train[tempcolumn])
    mdf_test[tempcolumn] = \
    np.where(mdf_test[tempcolumn] <= 0, np.nan, mdf_test[tempcolumn])
    
    #log transform column
    
    #take abs value of negtempcolumn
    mdf_train[negtempcolumn] = mdf_train[negtempcolumn].abs()
    mdf_test[negtempcolumn] = mdf_test[negtempcolumn].abs()
    
    mdf_train[negtempcolumn] = \
    np.where(mdf_train[negtempcolumn] != np.nan, np.floor(np.log10(mdf_train[negtempcolumn])), mdf_train[negtempcolumn])
    mdf_test[negtempcolumn] = \
    np.where(mdf_test[negtempcolumn] != np.nan, np.floor(np.log10(mdf_test[negtempcolumn])), mdf_test[negtempcolumn])
    
    train_neg_dict = {}
    newunique_list = []
    negunique = mdf_train[negtempcolumn].unique()
    for unique in negunique:
      if unique != unique:
        newunique = np.nan
      else:
        #this is update for difference between pwr2 and pwrs
        if negvalues:
          newunique = column + '_-10^' + str(int(unique))
        else:
          newunique = np.nan
      train_neg_dict.update({unique : newunique})
      newunique_list.append(newunique)
      
    test_neg_dict = {}
    negunique = mdf_test[negtempcolumn].unique()
    for unique in negunique:
      if unique != unique:
        newunique = np.nan
      else:
        #this is update for difference between pwr2 and pwrs
        if negvalues:
          newunique = column + '_-10^' + str(int(unique))
        else:
          newunique = np.nan
      if newunique in newunique_list and newunique == newunique:
        test_neg_dict.update({unique : newunique})
      else:
        test_neg_dict.update({unique : np.nan})
        
    mdf_train[negtempcolumn] = mdf_train[negtempcolumn].replace(train_neg_dict)
    mdf_test[negtempcolumn] = mdf_test[negtempcolumn].replace(test_neg_dict)
    
    #now log trasnform positive values in column column 

    mdf_train[tempcolumn] = \
    np.where(mdf_train[tempcolumn] != np.nan, np.floor(np.log10(mdf_train[tempcolumn])), mdf_train[tempcolumn])
    mdf_test[tempcolumn] = \
    np.where(mdf_test[tempcolumn] != np.nan, np.floor(np.log10(mdf_test[tempcolumn])), mdf_test[tempcolumn])

    train_pos_dict = {}
    newposunique_list = []
    posunique = mdf_train[tempcolumn].unique()
    for unique in posunique:
      if unique != unique:
        newunique = np.nan
      else:
        newunique = column + '_10^' + str(int(unique))
      train_pos_dict.update({unique : newunique})
      newposunique_list.append(newunique)
      
    test_pos_dict = {}
    posunique = mdf_test[tempcolumn].unique()
    for unique in posunique:
      if unique != unique:
        newunique = np.nan
      else:
        newunique = column + '_10^' + str(int(unique))
      if newunique in newposunique_list and newunique == newunique:
        test_pos_dict.update({unique : newunique})
      else:
        test_pos_dict.update({unique : np.nan})
    
    mdf_train[tempcolumn] = mdf_train[tempcolumn].replace(train_pos_dict)
    mdf_test[tempcolumn] = mdf_test[tempcolumn].replace(test_pos_dict)
    
    #combine the two columns
    mdf_train[tempcolumn] = mdf_train[negtempcolumn].where(mdf_train[negtempcolumn] == mdf_train[negtempcolumn], mdf_train[tempcolumn])
    mdf_test[tempcolumn] = mdf_test[negtempcolumn].where(mdf_test[negtempcolumn] == mdf_test[negtempcolumn], mdf_test[tempcolumn])
    
    #pandas one hot encoder
    df_train_cat = pd.get_dummies(mdf_train[tempcolumn])
    df_test_cat = pd.get_dummies(mdf_test[tempcolumn])
    
    labels_train = list(df_train_cat)
    labels_test = list(df_test_cat)

    #Get missing columns in test set that are present in training set
    missing_cols = set( df_train_cat.columns ) - set( df_test_cat.columns )
    
    #Add a missing column in test set with default value equal to 0
    for c in missing_cols:
        df_test_cat[c] = 0
    #Ensure the order of column in the test set is in the same order than in train set
    #Note this also removes categories in test set that aren't present in training set
    df_test_cat = df_test_cat[df_train_cat.columns]
    
    suffixoverlap_results = \
    self._df_check_suffixoverlap(mdf_train, list(df_train_cat), suffixoverlap_results)
    
    #concatinate the sparse set with the rest of our training data
    mdf_train = pd.concat([mdf_train, df_train_cat], axis=1)
    mdf_test = pd.concat([mdf_test, df_test_cat], axis=1)
    
    #replace original column from training data
    
    del mdf_train[negtempcolumn]    
    del mdf_test[negtempcolumn]
    
    del mdf_train[tempcolumn]    
    del mdf_test[tempcolumn]
    
    #create output of a list of the created column names
#     NAcolumn = columnNAr2
    labels_train = list(df_train_cat)
#     if NAcolumn in labels_train:
#       labels_train.remove(NAcolumn)
    powercolumns = labels_train
  
    #change data type for memory savings
    for powercolumn in powercolumns:
      mdf_train[powercolumn] = mdf_train[powercolumn].astype(np.int8)
      mdf_test[powercolumn] = mdf_test[powercolumn].astype(np.int8)
    
    normalizationdictvalues = labels_train
    normalizationdictkeys = powercolumns
    
    normalizationdictkeys.sort()
    normalizationdictvalues.sort()
    
    powerlabelsdict = dict(zip(normalizationdictkeys, normalizationdictvalues))
    
    #store some values in the text_dict{} for use later in ML infill methods
    column_dict_list = []
    
    for pc in powercolumns:
      
      #new parameter collected for driftreport
      tc_ratio = pc + '_ratio'
      tcratio = mdf_train[pc].sum() / mdf_train[pc].shape[0]

      powernormalization_dict = {pc : {'powerlabelsdict_pwrs' : powerlabelsdict, \
                                       'labels_train' : labels_train, \
                                       'missing_cols' : missing_cols, \
                                       'negvalues' : negvalues, \
                                       tc_ratio : tcratio}}
    
      column_dict = {pc : {'category' : 'pwrs', \
                           'origcategory' : category, \
                           'normalization_dict' : powernormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : powercolumns, \
                           'categorylist' : powercolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}
        
      column_dict_list.append(column_dict.copy())
    
    return mdf_train, mdf_test, column_dict_list
  
  def _process_pwor(self, mdf_train, mdf_test, column, category, postprocess_dict, params = {}):
    '''
    #processes a numerical set by creating bins coresponding to powers
    #of ten in ordinal encoded columns
    
    #pwrs will be intended for a raw set that is not yet normalized
    
    #infill has 0, other designations are based on the data
    
    #negative values based on negvalues parameter, makes comparable to por2
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    if 'negvalues' in params:
      negvalues = params['negvalues']
    else:
      negvalues = False
    
    pworcolumn = column + '_pwor'
    
    if inplace is not True:
      
      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self._df_copy_train(mdf_train, column, pworcolumn, suffixoverlap_results)
      
      mdf_test[pworcolumn] = mdf_test[column].copy()
    
    else:
      
      suffixoverlap_results = \
      self._df_check_suffixoverlap(mdf_train, pworcolumn, suffixoverlap_results)
      
      mdf_train.rename(columns = {column : pworcolumn}, inplace = True)
      mdf_test.rename(columns = {column : pworcolumn}, inplace = True)
    
    #convert all values to either numeric or NaN
    mdf_train[pworcolumn] = pd.to_numeric(mdf_train[pworcolumn], errors='coerce')
    mdf_test[pworcolumn] = pd.to_numeric(mdf_test[pworcolumn], errors='coerce')
    
    #copy set for negative values
    negtempcolumn = column + '_negtempcolumn'
    
    mdf_train, suffixoverlap_results = \
    self._df_copy_train(mdf_train, pworcolumn, negtempcolumn, suffixoverlap_results)
    
    mdf_test[negtempcolumn] = mdf_test[pworcolumn].copy()
    
    #convert all values >= 0 to Nan
    mdf_train[negtempcolumn] = \
    np.where(mdf_train[negtempcolumn] >= 0, np.nan, mdf_train[negtempcolumn])
    mdf_test[negtempcolumn] = \
    np.where(mdf_test[negtempcolumn] >= 0, np.nan, mdf_test[negtempcolumn])
    
    #take abs value of negtempcolumn
    mdf_train[negtempcolumn] = mdf_train[negtempcolumn].abs()
    mdf_test[negtempcolumn] = mdf_test[negtempcolumn].abs()
    
    #convert all values <= 0 in column to Nan
    mdf_train[pworcolumn] = \
    np.where(mdf_train[pworcolumn] <= 0, np.nan, mdf_train[pworcolumn])
    mdf_test[pworcolumn] = \
    np.where(mdf_test[pworcolumn] <= 0, np.nan, mdf_test[pworcolumn])

    mdf_train[pworcolumn] = \
    np.where(mdf_train[pworcolumn] != np.nan, np.floor(np.log10(mdf_train[pworcolumn])), mdf_train[pworcolumn])
    mdf_test[pworcolumn] = \
    np.where(mdf_test[pworcolumn] != np.nan, np.floor(np.log10(mdf_test[pworcolumn])), mdf_test[pworcolumn])
    
    #do same for negtempcolumn
    mdf_train[negtempcolumn] = \
    np.where(mdf_train[negtempcolumn] != np.nan, np.floor(np.log10(mdf_train[negtempcolumn])), mdf_train[negtempcolumn])
    mdf_test[negtempcolumn] = \
    np.where(mdf_test[negtempcolumn] != np.nan, np.floor(np.log10(mdf_test[negtempcolumn])), mdf_test[negtempcolumn])

    train_neg_dict = {}
    newunique_list = []
    negunique = mdf_train[negtempcolumn].unique()
    for unique in negunique:
      if unique != unique:
        newunique = np.nan
      else:
        #this is update for difference between pwr2 and pwrs
        if negvalues:
          newunique = column + '_-10^' + str(int(unique))
        else:
          newunique = np.nan
      train_neg_dict.update({unique : newunique})
      newunique_list.append(newunique)
      
    test_neg_dict = {}
    negunique = mdf_test[negtempcolumn].unique()
    for unique in negunique:
      if unique != unique:
        newunique = np.nan
      else:
        #this is update for difference between pwr2 and pwrs
        if negvalues:
          newunique = column + '_-10^' + str(int(unique))
        else:
          newunique = np.nan
      if newunique in newunique_list and newunique == newunique:
        test_neg_dict.update({unique : newunique})
      else:
        test_neg_dict.update({unique : np.nan})
        
    mdf_train[negtempcolumn] = mdf_train[negtempcolumn].replace(train_neg_dict)
    mdf_test[negtempcolumn] = mdf_test[negtempcolumn].replace(test_neg_dict)
    
    #now do same for column
    train_pos_dict = {}
    newposunique_list = []
    posunique = mdf_train[pworcolumn].unique()
    for unique in posunique:
      if unique != unique:
        newunique = np.nan
      else:
        newunique = column + '_10^' + str(int(unique))
      train_pos_dict.update({unique : newunique})
      newposunique_list.append(newunique)
      
    test_pos_dict = {}
    posunique = mdf_test[pworcolumn].unique()
    for unique in posunique:
      if unique != unique:
        newunique = np.nan
      else:
        newunique = column + '_10^' + str(int(unique))
      if newunique in newposunique_list and newunique == newunique:
        test_pos_dict.update({unique : newunique})
      else:
        test_pos_dict.update({unique : np.nan})
    
    mdf_train[pworcolumn] = mdf_train[pworcolumn].replace(train_pos_dict)
    mdf_test[pworcolumn] = mdf_test[pworcolumn].replace(test_pos_dict)
    
    #combine the two columns
    mdf_train[pworcolumn] = mdf_train[negtempcolumn].where(mdf_train[negtempcolumn] == mdf_train[negtempcolumn], mdf_train[pworcolumn])
    mdf_test[pworcolumn] = mdf_test[negtempcolumn].where(mdf_test[negtempcolumn] == mdf_test[negtempcolumn], mdf_test[pworcolumn])
    
    train_unique = mdf_train[pworcolumn].unique()
    test_unique = mdf_test[pworcolumn].unique()
  
    #Get missing entries in test set that are present in training set
    missing_cols = set( list(train_unique) ) - set( list(test_unique) )
    
    extra_cols = set( list(test_unique) ) - set( list(train_unique) )
    
    train_replace_dict = {}
    train_len = len(train_unique)
    for i in range(train_len):
      if train_unique[i] != train_unique[i]:
        train_replace_dict.update({train_unique[i] : 0})
      else:
        train_replace_dict.update({train_unique[i] : i+1})
    if np.nan not in train_replace_dict:
      train_replace_dict.update({np.nan : 0})
      
    test_replace_dict = {}
    for testunique in test_unique:
      if testunique in train_unique:
        test_replace_dict.update({testunique : train_replace_dict[testunique]})
      else:
        test_replace_dict.update({testunique : 0})
    
#     pworcolumn = column + '_por2'
#     mdf_train[pworcolumn] = mdf_train[column].copy()
#     mdf_test[pworcolumn] = mdf_test[column].copy()
    
    mdf_train[pworcolumn] = mdf_train[pworcolumn].replace(train_replace_dict)
    mdf_test[pworcolumn] = mdf_test[pworcolumn].replace(test_replace_dict)
    
    #replace original column from training data
    del mdf_train[negtempcolumn]    
    del mdf_test[negtempcolumn]    

    #returned data type is conditional on the size of encoding space
    bn_count = len(train_replace_dict)
    if bn_count < 254:
      mdf_train[pworcolumn] = mdf_train[pworcolumn].astype(np.uint8)
      mdf_test[pworcolumn] = mdf_test[pworcolumn].astype(np.uint8)
    elif bn_count < 65534:
      mdf_train[pworcolumn] = mdf_train[pworcolumn].astype(np.uint16)
      mdf_test[pworcolumn] = mdf_test[pworcolumn].astype(np.uint16)
    else:
      mdf_train[pworcolumn] = mdf_train[pworcolumn].astype(np.uint32)
      mdf_test[pworcolumn] = mdf_test[pworcolumn].astype(np.uint32)
   
    #store some values in the text_dict{} for use later in ML infill methods
    column_dict_list = []
    
    powercolumns = [pworcolumn]
    
    #new driftreport metric ordl_activations_dict
    ordl_activations_dict = {}
    for unique in mdf_train[pworcolumn].unique():
      sumcalc = (mdf_train[pworcolumn] == unique).sum() 
      ratio = sumcalc / mdf_train[pworcolumn].shape[0]
      ordl_activations_dict.update({unique:ratio})

    inverse_train_replace_dict = {value:key for key,value in train_replace_dict.items()}
    activations_list = list(inverse_train_replace_dict)
    
    for pc in powercolumns:

      powernormalization_dict = {pc : {'train_replace_dict' : train_replace_dict, \
                                       'inverse_train_replace_dict' : inverse_train_replace_dict, \
                                       'activations_list' : activations_list, \
                                       'test_replace_dict' : test_replace_dict, \
                                       'ordl_activations_dict' : ordl_activations_dict, \
                                       'negvalues' : negvalues}}
    
      column_dict = {pc : {'category' : 'pwor', \
                           'origcategory' : category, \
                           'normalization_dict' : powernormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : powercolumns, \
                           'categorylist' : powercolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}
        
      column_dict_list.append(column_dict.copy())
    
    return mdf_train, mdf_test, column_dict_list

  def _process_bins(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    '''
    #bins processes a numerical set by creating bins coresponding to post z score
    #normalization of <-2, -2-1, -10, 01, 12, >2 in one hot encoded columns
    
    #bins will be intended for a raw set that is not normalized
    #bint will be intended for a previously normalized set
    
    #bins accepts a parameter bincount 
    #as integer for number of bins
    #where if bincount is an odd number the center bin straddles the mean
    #and if bincount is even the center two bins straddle the mean
    #defaults to 6 bins
    #suffix appender is '_bins_#'' where # is integer for bin id
    '''
    
    suffixoverlap_results = {}
    
    if 'bincount' in params:
      bincount = params['bincount']
    else:
      bincount = 6
    
    #if data is known to be z-score normalized we'll reduce the computational overhead
    if 'normalizedinput' in params:
      normalizedinput = params['normalizedinput']
    else:
      normalizedinput = False
    
    binscolumn = column + '_bins'
    
    if bincount > 0:

      mdf_train, suffixoverlap_results = \
      self._df_copy_train(mdf_train, column, binscolumn, suffixoverlap_results)

      mdf_test[binscolumn] = mdf_test[column].copy()

      #convert all values to either numeric or NaN
      mdf_train[binscolumn] = pd.to_numeric(mdf_train[binscolumn], errors='coerce')
      mdf_test[binscolumn] = pd.to_numeric(mdf_test[binscolumn], errors='coerce')
      
      if normalizedinput is False:

        #get mean of training data
        mean = mdf_train[binscolumn].mean()

        if mean != mean:
          mean = 0

        #get standard deviation of training data
        std = mdf_train[binscolumn].std()

        #special case, if standard deviation is 0 we'll set it to 1 to avoid division by 0
        if std == 0:
          std = 1

        if std != std:
          std = 1
      
      else:
        mean = 0
        std = 1
      
      #replace missing data with training set mean
      mdf_train[binscolumn] = mdf_train[binscolumn].fillna(mean)
      mdf_test[binscolumn] = mdf_test[binscolumn].fillna(mean)
      
      if normalizedinput is False:

        #z-score normalize
        mdf_train[binscolumn] = (mdf_train[binscolumn] - mean) / std
        mdf_test[binscolumn] = (mdf_test[binscolumn] - mean) / std

      #derive cuts based on bincount

      bincuts = []

      mincut = - (bincount - 2) / 2
      bincuts.append(-float('inf'))

      for i in range(bincount - 1):
        bincuts.append(mincut)
        mincut += 1

      bincuts.append(float('inf'))

      binlabels = list(range(bincount))
      binlabels = list(map(str, binlabels))

      #create bins based on standard deviation increments
  #     binscolumn = column + '_bins'
      mdf_train[binscolumn] = \
      pd.cut( mdf_train[binscolumn], bins = bincuts,  \
             labels = binlabels, precision=4)
      mdf_test[binscolumn] = \
      pd.cut( mdf_test[binscolumn], bins = bincuts,  \
             labels = binlabels, precision=4)

      #returned column headers
      textcolumns = []
      for binlabel in binlabels:
        textcolumns.append(binscolumn + '_' + binlabel)

      suffixoverlap_results = \
      self._df_check_suffixoverlap(mdf_train, textcolumns, suffixoverlap_results)

      #process bins as a categorical set
      mdf_train = \
      self._postprocess_textsupport(mdf_train, binscolumn, {}, 'tempkey', {'textcolumns':textcolumns})
      mdf_test = \
      self._postprocess_textsupport(mdf_test, binscolumn, {}, 'tempkey', {'textcolumns':textcolumns})

      #change data type for memory savings
      for textcolumn in textcolumns:
        mdf_train[textcolumn] = mdf_train[textcolumn].astype(np.int8)
        mdf_test[textcolumn] = mdf_test[textcolumn].astype(np.int8)

      #delete the support column
      del mdf_train[binscolumn]
      del mdf_test[binscolumn]
    
      #store some values in the nmbr_dict{} for use later in ML infill methods
      column_dict_list = []
      
      for nc in textcolumns:

        #new parameter collected for driftreport
        tc_ratio = nc + '_ratio'
        tcratio = mdf_train[nc].sum() / mdf_train[nc].shape[0]

        nmbrnormalization_dict = {nc : {'bincuts' : bincuts, \
                                        'binlabels' : binlabels, \
                                        'binscolumns' : textcolumns, \
                                        'bincount' : bincount, \
                                        'binsmean' : mean, \
                                        'binsstd' : std, \
                                        'normalizedinput' : normalizedinput, \
                                        tc_ratio : tcratio}}

        column_dict = { nc : {'category' : 'bins', \
                              'origcategory' : category, \
                              'normalization_dict' : nmbrnormalization_dict, \
                              'origcolumn' : column, \
                              'inputcolumn' : column, \
                              'columnslist' : textcolumns, \
                              'categorylist' : textcolumns, \
                              'infillmodel' : False, \
                              'infillcomplete' : False, \
                              'suffixoverlap_results' : suffixoverlap_results, \
                              'deletecolumn' : False}}

        column_dict_list.append(column_dict.copy())
          
    else:
      
      column_dict_list = []

    return mdf_train, mdf_test, column_dict_list

  def _process_bsor(self, mdf_train, mdf_test, column, category, postprocess_dict, params = {}):
    '''
    #bins processes a numerical set by creating bins coresponding to post z score
    #normalization of <-2, -2-1, -10, 01, 12, >2 in one hot encoded columns
    
    #bins accepts a parameter bincount 
    #as integer for number of bins
    #where if bincount is an odd number the center bin straddles the mean
    #and if bincount is even the center two bins straddle the mean
    #defaults to 5 bins
    #suffix appender is '_bins_#'' where # is integer for bin id
    
    #bsor is comparable to bins but returns ordinal encoded column
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    if 'bincount' in params:
      bincount = params['bincount']
    else:
      bincount = 6
      
    #if data is known to be z-score normalized we'll reduce the computational overhead
    if 'normalizedinput' in params:
      normalizedinput = params['normalizedinput']
    else:
      normalizedinput = False
    
    binscolumn = column + '_bsor'
    
    if bincount > 0:

      if inplace is not True:

        #copy source column into new column
        mdf_train, suffixoverlap_results = \
        self._df_copy_train(mdf_train, column, binscolumn, suffixoverlap_results)

        mdf_test[binscolumn] = mdf_test[column].copy()

      else:

        suffixoverlap_results = \
        self._df_check_suffixoverlap(mdf_train, binscolumn, suffixoverlap_results)

        mdf_train.rename(columns = {column : binscolumn}, inplace = True)
        mdf_test.rename(columns = {column : binscolumn}, inplace = True)
      
      #convert all values to either numeric or NaN
      mdf_train[binscolumn] = pd.to_numeric(mdf_train[binscolumn], errors='coerce')
      mdf_test[binscolumn] = pd.to_numeric(mdf_test[binscolumn], errors='coerce')
      
      if normalizedinput is False:

        #get mean of training data
        mean = mdf_train[binscolumn].mean()
        if mean != mean:
          mean = 0

        #get standard deviation of training data
        std = mdf_train[binscolumn].std()
        if std == 0:
          std = 1
        if std != std:
          std = 1
          
      else:
        
        mean = 0
        std = 1

      #replace missing data with training set mean
      mdf_train[binscolumn] = mdf_train[binscolumn].fillna(mean)
      mdf_test[binscolumn] = mdf_test[binscolumn].fillna(mean)
      
      if normalizedinput is False:

        #z-score normalize
        mdf_train[binscolumn] = (mdf_train[binscolumn] - mean) / std
        mdf_test[binscolumn] = (mdf_test[binscolumn] - mean) / std
      
      #derive cuts based on bincount

      bincuts = []

      mincut = - (bincount - 2) / 2
      bincuts.append(-float('inf'))

      for i in range(bincount - 1):
        bincuts.append(mincut)
        mincut += 1

      bincuts.append(float('inf'))

      binlabels = list(range(bincount))

  #     binscolumn = column + '_bsor'
      mdf_train[binscolumn] = \
      pd.cut( mdf_train[binscolumn], bins = bincuts,  \
             labels = binlabels, precision=4)
      mdf_test[binscolumn] = \
      pd.cut( mdf_test[binscolumn], bins = bincuts,  \
             labels = binlabels, precision=4)

      ordinal_dict = {}
      for binlabel in binlabels:
        ordinal_dict.update({str(binlabel) : binlabel})

      #new driftreport metric ordl_activations_dict
      ordl_activations_dict = {}
      for key in ordinal_dict:
        sumcalc = (mdf_train[binscolumn] == ordinal_dict[key]).sum() 
        ratio = sumcalc / mdf_train[binscolumn].shape[0]
        ordl_activations_dict.update({key:ratio})

      inverse_ordinal_dict = {value:key for key,value in ordinal_dict.items()}
      activations_list = list(inverse_ordinal_dict)

      mdf_train[binscolumn] = mdf_train[binscolumn].astype(np.int8)
      mdf_test[binscolumn] = mdf_test[binscolumn].astype(np.int8)

      #create list of columns
      nmbrcolumns = [binscolumn]

      #nmbrnormalization_dict = {'mean' : mean, 'std' : std}

      #store some values in the nmbr_dict{} for use later in ML infill methods
      column_dict_list = []

      for nc in nmbrcolumns:

        nmbrnormalization_dict = {nc : {'ordinal_dict' : ordinal_dict, \
                                        'inverse_ordinal_dict' : inverse_ordinal_dict, \
                                        'activations_list' : activations_list, \
                                        'ordl_activations_dict' : ordl_activations_dict, \
                                        'binsmean' : mean, \
                                        'binsstd' : std, \
                                        'normalizedinput' : normalizedinput, \
                                        'bincount' : bincount, \
                                        'bincuts' : bincuts, \
                                        'binlabels' : binlabels}}

        column_dict = { nc : {'category' : 'bsor', \
                              'origcategory' : category, \
                              'normalization_dict' : nmbrnormalization_dict, \
                              'origcolumn' : column, \
                              'inputcolumn' : column, \
                              'columnslist' : nmbrcolumns, \
                              'categorylist' : nmbrcolumns, \
                              'infillmodel' : False, \
                              'infillcomplete' : False, \
                              'suffixoverlap_results' : suffixoverlap_results, \
                              'deletecolumn' : False}}

        column_dict_list.append(column_dict.copy())

    else:
      
      column_dict_list = []
    
    return mdf_train, mdf_test, column_dict_list
  
  def _process_bnwd(self, mdf_train, mdf_test, column, category, postprocess_dict, params = {}):
    '''
    #processes a numerical set by creating equal width bins coresponding to 
    #parameter 'width' which defaults to 1
    #and returning in one-hot encoded set
    
    #deletes columns without activations
    
    #can be applied top either a raw set not yet normalized or after normalization
    #such as after z-score normalization)
    '''
    
    suffixoverlap_results = {}
    
    if 'width' in params:
      bn_width = params['width']
    else:
      bn_width = 1
      
    if 'suffix' in params:
      suffix = params['suffix']
    else:
      suffix = '_bnwd'
      
    binscolumn = column + suffix

    #store original column for later reversion
    mdf_train, suffixoverlap_results = \
    self._df_copy_train(mdf_train, column, binscolumn, suffixoverlap_results)
    
    mdf_test[binscolumn] = mdf_test[column].copy()

    #convert all values to either numeric or NaN
    mdf_train[binscolumn] = pd.to_numeric(mdf_train[binscolumn], errors='coerce')
    mdf_test[binscolumn] = pd.to_numeric(mdf_test[binscolumn], errors='coerce')

    #get mean of training data
    mean = mdf_train[binscolumn].mean()
    
    if mean != mean:
      mean = 0

    #replace missing data with training set mean
    mdf_train[binscolumn] = mdf_train[binscolumn].fillna(mean)
    mdf_test[binscolumn] = mdf_test[binscolumn].fillna(mean)

    #evaluate train set for transformation parameters
    bn_min = mdf_train[binscolumn].min()
    bn_max = mdf_train[binscolumn].max()
    bn_delta = bn_max - bn_min
    if bn_delta == 0:
      bn_delta = 1
    bn_count = int(np.ceil(bn_delta / bn_width))
    
    bins_id = []
    for i in range(bn_count):
      bins_id.append(str(bn_width) + '_' + str(i))
      
    bins_cuts = [-float('inf'), float('inf')]
    for i in range(bn_count-1):
      bins_cuts.insert(-1,(bn_min + (i+1) * bn_width))
      
    #create bins based on standard deviation increments
#     binscolumn = column + '_bnwd'
    mdf_train[binscolumn] = \
    pd.cut(mdf_train[binscolumn], bins = bins_cuts,  \
           labels = bins_id, precision=len(str(bn_count)))
    mdf_test[binscolumn] = \
    pd.cut(mdf_test[binscolumn], bins = bins_cuts,  \
           labels = bins_id, precision=len(str(bn_count)))

    foundinset = mdf_train[binscolumn].unique()
    
    textcolumns = []
    for i in foundinset:
      textcolumns.append(binscolumn + '_' + str(i))
      
    #postprocess_textsupport  will return columns in alphabetical order
    textcolumns.sort()
    
    suffixoverlap_results = \
    self._df_check_suffixoverlap(mdf_train, textcolumns, suffixoverlap_results)
    
    #process bins as a categorical set
    mdf_train = \
    self._postprocess_textsupport(mdf_train, binscolumn, {}, 'tempkey', {'textcolumns':textcolumns})
    mdf_test = \
    self._postprocess_textsupport(mdf_test, binscolumn, {}, 'tempkey', {'textcolumns':textcolumns})
    
    #change data type for memory savings
    for textcolumn in textcolumns:
      mdf_train[textcolumn] = mdf_train[textcolumn].astype(np.int8)
      mdf_test[textcolumn] = mdf_test[textcolumn].astype(np.int8)
    
    #delete the support column
    del mdf_train[binscolumn]
    del mdf_test[binscolumn]

    #nmbrnormalization_dict = {'mean' : mean, 'std' : std}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in textcolumns:
      
      #new parameter collected for driftreport
      tc_ratio = nc + '_ratio'
      tcratio = mdf_train[nc].sum() / mdf_train[nc].shape[0]

      nmbrnormalization_dict = {nc : {'suffix' : suffix, \
                                      'binsmean' : mean, \
                                      'bn_min' : bn_min, \
                                      'bn_max' : bn_max, \
                                      'bn_delta' : bn_delta, \
                                      'bn_count' : bn_count, \
                                      'bins_id' : bins_id, \
                                      'bins_cuts' : bins_cuts, \
                                      'bn_width_bnwd' : bn_width, \
                                      'textcolumns' : textcolumns, \
                                      tc_ratio : tcratio}}

      column_dict = { nc : {'category' : 'bnwd', \
                            'origcategory' : category, \
                            'normalization_dict' : nmbrnormalization_dict, \
                            'origcolumn' : column, \
                            'inputcolumn' : column, \
                            'columnslist' : textcolumns, \
                            'categorylist' : textcolumns, \
                            'infillmodel' : False, \
                            'infillcomplete' : False, \
                            'suffixoverlap_results' : suffixoverlap_results, \
                            'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
       
    return mdf_train, mdf_test, column_dict_list

  def _process_bnwo(self, mdf_train, mdf_test, column, category, postprocess_dict, params = {}):
    '''
    #processes a numerical set by creating equal width bins coresponding to 
    #parameter 'width' which defaults to 1
    #and returning in ordinal encoded set
    
    #segments without activations are included
    
    #can be applied top either a raw set not yet normalized or after normalization
    #such as after z-score normalization)
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    if 'width' in params:
      bn_width = params['width']
    else:
      bn_width = 1
      
    if 'suffix' in params:
      suffix = params['suffix']
    else:
      suffix = '_bnwo'
      
    binscolumn = column + suffix

    if inplace is not True:

      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self._df_copy_train(mdf_train, column, binscolumn, suffixoverlap_results)

      mdf_test[binscolumn] = mdf_test[column].copy()

    else:

      suffixoverlap_results = \
      self._df_check_suffixoverlap(mdf_train, binscolumn, suffixoverlap_results)

      mdf_train.rename(columns = {column : binscolumn}, inplace = True)
      mdf_test.rename(columns = {column : binscolumn}, inplace = True)
    
    #convert all values to either numeric or NaN
    mdf_train[binscolumn] = pd.to_numeric(mdf_train[binscolumn], errors='coerce')
    mdf_test[binscolumn] = pd.to_numeric(mdf_test[binscolumn], errors='coerce')

    #get mean of training data
    mean = mdf_train[binscolumn].mean()
    
    if mean != mean:
      mean = 0

    #replace missing data with training set mean
    mdf_train[binscolumn] = mdf_train[binscolumn].fillna(mean)
    mdf_test[binscolumn] = mdf_test[binscolumn].fillna(mean)

    #evaluate train set for transformation parameters
    bn_min = mdf_train[binscolumn].min()
    bn_max = mdf_train[binscolumn].max()
    bn_delta = bn_max - bn_min
    if bn_delta == 0:
      bn_delta = 1
    bn_count = int(np.ceil(bn_delta / bn_width))
    
    bins_id = []
    for i in range(bn_count):
      bins_id.append(i)
      
    bins_cuts = [-float('inf'), float('inf')]
    for i in range(bn_count-1):
      bins_cuts.insert(-1,(bn_min + (i+1) * bn_width))
      
    #create bins based on standard deviation increments
#     binscolumn = column + '_bnwo'
    mdf_train[binscolumn] = \
    pd.cut(mdf_train[binscolumn], bins = bins_cuts,  \
           labels = bins_id, precision=len(str(bn_count)))
    mdf_test[binscolumn] = \
    pd.cut(mdf_test[binscolumn], bins = bins_cuts,  \
           labels = bins_id, precision=len(str(bn_count)))
    
    #returned data type is conditional on the size of encoding space
    if bn_count < 254:
      mdf_train[binscolumn] = mdf_train[binscolumn].astype(np.uint8)
      mdf_test[binscolumn] = mdf_test[binscolumn].astype(np.uint8)
    elif bn_count < 65534:
      mdf_train[binscolumn] = mdf_train[binscolumn].astype(np.uint16)
      mdf_test[binscolumn] = mdf_test[binscolumn].astype(np.uint16)
    else:
      mdf_train[binscolumn] = mdf_train[binscolumn].astype(np.uint32)
      mdf_test[binscolumn] = mdf_test[binscolumn].astype(np.uint32)

    #create list of columns
    nmbrcolumns = [binscolumn]
    
    #new driftreport metric ordl_activations_dict
    ordl_activations_dict = {}
    for unique in mdf_train[binscolumn].unique():
      sumcalc = (mdf_train[binscolumn] == unique).sum() 
      ratio = sumcalc / mdf_train[binscolumn].shape[0]
      ordl_activations_dict.update({unique:ratio})

    activations_list = list(ordl_activations_dict)

    #nmbrnormalization_dict = {'mean' : mean, 'std' : std}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      nmbrnormalization_dict = {nc : {'suffix' : suffix, \
                                      'binsmean' : mean, \
                                      'bn_min' : bn_min, \
                                      'bn_max' : bn_max, \
                                      'bn_delta' : bn_delta, \
                                      'bn_count' : bn_count, \
                                      'bins_id' : bins_id, \
                                      'bins_cuts' : bins_cuts, \
                                      'bn_width' : bn_width, \
                                      'activations_list' : activations_list, \
                                      'ordl_activations_dict' : ordl_activations_dict}}

      column_dict = { nc : {'category' : 'bnwo', \
                            'origcategory' : category, \
                            'normalization_dict' : nmbrnormalization_dict, \
                            'origcolumn' : column, \
                            'inputcolumn' : column, \
                            'columnslist' : nmbrcolumns, \
                            'categorylist' : nmbrcolumns, \
                            'infillmodel' : False, \
                            'infillcomplete' : False, \
                            'suffixoverlap_results' : suffixoverlap_results, \
                            'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())

    return mdf_train, mdf_test, column_dict_list

  def _process_bnep(self, mdf_train, mdf_test, column, category, postprocess_dict, params = {}):
    '''
    #processes a numerical set by creating equal population bins coresponding to 
    #parameter 'bincount' which defaults to 5
    #and returning in one-hot encoded set
    
    #default infill is to have no activations in a row
    
    #can be applied top either a raw set not yet normalized or after normalization
    #such as after z-score normalization)
    '''
    
    suffixoverlap_results = {}
    
    if 'bincount' in params:
      bincount = params['bincount']
    else:
      bincount = 5
      
    if 'suffix' in params:
      suffix = params['suffix']
    else:
      suffix = '_bnep'
      
    binscolumn = column + suffix

    #copy original column
    mdf_train, suffixoverlap_results = \
    self._df_copy_train(mdf_train, column, binscolumn, suffixoverlap_results)
    
    mdf_test[binscolumn] = mdf_test[column].copy()

    #convert all values to either numeric or NaN
    mdf_train[binscolumn] = pd.to_numeric(mdf_train[binscolumn], errors='coerce')
    mdf_test[binscolumn] = pd.to_numeric(mdf_test[binscolumn], errors='coerce')

    #get mean of training data
    mean = mdf_train[binscolumn].mean()
    
    if mean != mean:
      mean = 0

#     #replace missing data with training set mean
#     mdf_train[binscolumn] = mdf_train[binscolumn].fillna(mean)
#     mdf_test[binscolumn] = mdf_test[binscolumn].fillna(mean)

    #evaluate train set for transformation parameters
    bn_min = mdf_train[binscolumn].min()
    bn_max = mdf_train[binscolumn].max()
    bn_delta = bn_max - bn_min
#     if bn_delta == 0:
#       bn_delta = 1
#     bn_count = int(np.ceil(bn_delta / bn_width))

#     if bn_delta > 0 and bn_min == bn_min:

    if bn_delta > 0 and bn_min == bn_min:

      #grab the intervals using qcut based on equal population in train set
      intervalset = pd.qcut(mdf_train[binscolumn], bincount, duplicates='drop').unique()

      #note we're sorting here, and scrubbing any nan
      intervalset = sorted([interval for interval in intervalset if interval == interval])

      #we'll make the bottom interval open-ended at negative end
      firstinterval = pd.Interval(-np.inf, intervalset[0].right, closed='right')

      #and the last interval open-ended at positive end
      lastinterval = pd.Interval(intervalset[-1].left, np.inf, closed='right')

      #now create a list to apply in cut operatoin
      newinterval_list = []

      #now we'll assemble a list of intervals to prepare for the cut operation, 
      #replacing first and last with the open-ended
      for i in range(len(intervalset)):
        if i == 0:
          newinterval_list.append(firstinterval)
        elif i == len(intervalset)-1:
          newinterval_list.append(lastinterval)
        else:
          newinterval_list.append(intervalset[i])

      #now translate intervals to the list of boundaries for cut operatoin
      cutintervals = []

      for interval in newinterval_list:

        cutintervals.append(interval.left)

      cutintervals.append(np.inf)

      bn_count = len(newinterval_list)

      bins_id = []
      for i in range(bn_count):
        bins_id.append(str(i))

      bins_cuts = cutintervals

      #create bins based on standard deviation increments
  #     binscolumn = column + '_bnwd'
      mdf_train[binscolumn] = \
      pd.cut(mdf_train[binscolumn], bins = bins_cuts,  \
             labels = bins_id, precision=len(str(bn_count)), duplicates='drop')
      mdf_test[binscolumn] = \
      pd.cut(mdf_test[binscolumn], bins = bins_cuts,  \
             labels = bins_id, precision=len(str(bn_count)), duplicates='drop')

      foundinset = mdf_train[binscolumn].unique()

      textcolumns = []
      for i in foundinset:
        if i == i:
          textcolumns.append(binscolumn + '_' + str(i))

      #postprocess_textsupport  will return columns in alphabetical order
      textcolumns.sort()
      
      suffixoverlap_results = \
      self._df_check_suffixoverlap(mdf_train, textcolumns, suffixoverlap_results)

      #process bins as a categorical set
      mdf_train = \
      self._postprocess_textsupport(mdf_train, binscolumn, {}, 'tempkey', {'textcolumns':textcolumns})
      mdf_test = \
      self._postprocess_textsupport(mdf_test, binscolumn, {}, 'tempkey', {'textcolumns':textcolumns})

      #change data type for memory savings
      for textcolumn in textcolumns:
        mdf_train[textcolumn] = mdf_train[textcolumn].astype(np.int8)
        mdf_test[textcolumn] = mdf_test[textcolumn].astype(np.int8)

      #delete the support column
      del mdf_train[binscolumn]
      del mdf_test[binscolumn]
      
    else:
      mdf_train[binscolumn] = 0
      mdf_test[binscolumn] = 0
      
      textcolumns = [binscolumn]
      
      bn_count = bincount
      bins_id = False
      bins_cuts = False

    #nmbrnormalization_dict = {'mean' : mean, 'std' : std}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in textcolumns:
      
      #new parameter collected for driftreport
      tc_ratio = nc + '_ratio'
      tcratio = mdf_train[nc].sum() / mdf_train[nc].shape[0]

      nmbrnormalization_dict = {nc : {'suffix' : suffix, \
                                      'binsmean' : mean, \
                                      'bn_min' : bn_min, \
                                      'bn_max' : bn_max, \
                                      'bn_delta' : bn_delta, \
                                      'bn_count' : bn_count, \
                                      'bins_id' : bins_id, \
                                      'bins_cuts' : bins_cuts, \
                                      'bincount_bnep' : bincount, \
                                      'textcolumns' : textcolumns, \
                                      tc_ratio : tcratio}}

      column_dict = { nc : {'category' : 'bnep', \
                            'origcategory' : category, \
                            'normalization_dict' : nmbrnormalization_dict, \
                            'origcolumn' : column, \
                            'inputcolumn' : column, \
                            'columnslist' : textcolumns, \
                            'categorylist' : textcolumns, \
                            'infillmodel' : False, \
                            'infillcomplete' : False, \
                            'suffixoverlap_results' : suffixoverlap_results, \
                            'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
       
    return mdf_train, mdf_test, column_dict_list

  def _process_bneo(self, mdf_train, mdf_test, column, category, postprocess_dict, params = {}):
    '''
    #processes a numerical set by creating equal population bins coresponding to 
    #parameter 'bincount' which defaults to 5
    #and returning in ordinal encoded set
    
    #default infill is adjacent cell infill
    
    #can be applied top either a raw set not yet normalized or after normalization
    #such as after z-score normalization)
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    if 'bincount' in params:
      bincount = params['bincount']
    else:
      bincount = 5
      
    if 'suffix' in params:
      suffix = params['suffix']
    else:
      suffix = '_bneo'
      
    binscolumn = column + suffix

    if inplace is not True:

      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self._df_copy_train(mdf_train, column, binscolumn, suffixoverlap_results)

      mdf_test[binscolumn] = mdf_test[column].copy()

    else:

      suffixoverlap_results = \
      self._df_check_suffixoverlap(mdf_train, binscolumn, suffixoverlap_results)

      mdf_train.rename(columns = {column : binscolumn}, inplace = True)
      mdf_test.rename(columns = {column : binscolumn}, inplace = True)

    #convert all values to either numeric or NaN
    mdf_train[binscolumn] = pd.to_numeric(mdf_train[binscolumn], errors='coerce')
    mdf_test[binscolumn] = pd.to_numeric(mdf_test[binscolumn], errors='coerce')

    #get mean of training data
    mean = mdf_train[binscolumn].mean()
    
    if mean != mean:
      mean = 0

#     #replace missing data with training set mean
#     mdf_train[binscolumn] = mdf_train[binscolumn].fillna(mean)
#     mdf_test[binscolumn] = mdf_test[binscolumn].fillna(mean)

    #evaluate train set for transformation parameters
    bn_min = mdf_train[binscolumn].min()
    bn_max = mdf_train[binscolumn].max()
    bn_delta = bn_max - bn_min
#     if bn_delta == 0:
#       bn_delta = 1
#     bn_count = int(np.ceil(bn_delta / bn_width))

    if bn_delta > 0 and bn_min == bn_min:

      #grab the intervals using qcut based on equal population in train set
      intervalset = pd.qcut(mdf_train[binscolumn], bincount, duplicates='drop').unique()

      #note we're sorting here, and scrubbing any nan
      intervalset = sorted([interval for interval in intervalset if interval == interval])

      #we'll make the bottom interval open-ended at negative end
      firstinterval = pd.Interval(-np.inf, intervalset[0].right, closed='right')

      #and the last interval open-ended at positive end
      lastinterval = pd.Interval(intervalset[-1].left, np.inf, closed='right')

      #now create a list to apply in cut operation
      newinterval_list = []

      #now we'll assemble a list of intervals to prepare for the cut operation, 
      #replacing first and last with the open-ended
      for i in range(len(intervalset)):
        if i == 0:
          newinterval_list.append(firstinterval)
        elif i == len(intervalset)-1:
          newinterval_list.append(lastinterval)
        else:
          newinterval_list.append(intervalset[i])

      #now translate intervals to the list of boundaries for cut operatoin
      cutintervals = []

      for interval in newinterval_list:

        cutintervals.append(interval.left)

      cutintervals.append(np.inf)

      bn_count = len(newinterval_list)

      bins_id = []
      for i in range(bn_count):
        bins_id.append(i)

      bins_cuts = cutintervals

      #create bins based on prepared increments
      mdf_train[binscolumn] = \
      pd.cut(mdf_train[binscolumn], bins = bins_cuts,  \
             labels = bins_id, precision=len(str(bn_count)), duplicates='drop')
      mdf_test[binscolumn] = \
      pd.cut(mdf_test[binscolumn], bins = bins_cuts,  \
             labels = bins_id, precision=len(str(bn_count)), duplicates='drop')

      #apply ffill to replace NArows with value from adjacent cell in pre4ceding row
      mdf_train[binscolumn] = mdf_train[binscolumn].fillna(method='ffill')
      mdf_test[binscolumn] = mdf_test[binscolumn].fillna(method='ffill')

      #we'll follow with a bfill just in case first row had a nan
      mdf_train[binscolumn] = mdf_train[binscolumn].fillna(method='bfill')
      mdf_test[binscolumn] = mdf_test[binscolumn].fillna(method='bfill')

      #and if the entire set was nan we'll infill with a 0 plug
      mdf_train[binscolumn] = mdf_train[binscolumn].fillna(0)
      mdf_test[binscolumn] = mdf_test[binscolumn].fillna(0)
      
      #returned data type is conditional on the size of encoding space
      if bn_count < 254:
        mdf_train[binscolumn] = mdf_train[binscolumn].astype(np.uint8)
        mdf_test[binscolumn] = mdf_test[binscolumn].astype(np.uint8)
      elif bn_count < 65534:
        mdf_train[binscolumn] = mdf_train[binscolumn].astype(np.uint16)
        mdf_test[binscolumn] = mdf_test[binscolumn].astype(np.uint16)
      else:
        mdf_train[binscolumn] = mdf_train[binscolumn].astype(np.uint32)
        mdf_test[binscolumn] = mdf_test[binscolumn].astype(np.uint32)
      
    else:
      
      mdf_train[binscolumn] = 0
      mdf_test[binscolumn] = 0
      
      bn_count = bincount
      bins_id = False
      bins_cuts = False

    #create list of columns
    nmbrcolumns = [binscolumn]

    #new driftreport metric ordl_activations_dict
    ordl_activations_dict = {}
    for unique in mdf_train[binscolumn].unique():
      sumcalc = (mdf_train[binscolumn] == unique).sum() 
      ratio = sumcalc / mdf_train[binscolumn].shape[0]
      ordl_activations_dict.update({unique:ratio})

    #nmbrnormalization_dict = {'mean' : mean, 'std' : std}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      nmbrnormalization_dict = {nc : {'suffix' : suffix, \
                                      'binsmean' : mean, \
                                      'bn_min' : bn_min, \
                                      'bn_max' : bn_max, \
                                      'bn_delta' : bn_delta, \
                                      'bn_count' : bn_count, \
                                      'bins_id' : bins_id, \
                                      'activations_list' : bins_id, \
                                      'bins_cuts' : bins_cuts, \
                                      'bincount' : bincount, \
                                      'ordl_activations_dict' : ordl_activations_dict}}

      column_dict = { nc : {'category' : 'bneo', \
                            'origcategory' : category, \
                            'normalization_dict' : nmbrnormalization_dict, \
                            'origcolumn' : column, \
                            'inputcolumn' : column, \
                            'columnslist' : nmbrcolumns, \
                            'categorylist' : nmbrcolumns, \
                            'infillmodel' : False, \
                            'infillcomplete' : False, \
                            'suffixoverlap_results' : suffixoverlap_results, \
                            'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())

    return mdf_train, mdf_test, column_dict_list
  
  def _process_tlbn(self, mdf_train, mdf_test, column, category, postprocess_dict, params = {}):
    '''
    #processes a numerical set by creating equal population bins coresponding to 
    #parameter 'bincount' which defaults to 9
    #and returning in one-hot encoded set

    #as alternate to specifying bincount for equal population bins
    #can also pass parameter buckets as list of bucket boundaries
    #leaving out -/+ inf in first and last bins as will be added
    
    #how this differs from bnep in that the activated bins are replaced with
    #min-max scaling for source column values found in that bin, and then other values as -1
    
    #note that for the bottom bin order reversed to accomodate subsequent values out of range 
    #and still use -1 register
    
    #default infill is to have no activations in a row
    
    #can be applied top either a raw set not yet normalized or after normalization
    #such as after z-score normalization)
    '''
    
    suffixoverlap_results = {}
    
    if 'bincount' in params:
      bincount = params['bincount']
    else:
      bincount = 9
    bincount_orig = bincount
      
    #if buckets is passed as list of boundaries, it overrides bincount
    #note that should leave out -/+ inf in first and last bins (will be added)
    if 'buckets' in params:
      buckets = params['buckets']
    else:
      buckets = False

    binscolumn = column + '_tlbn'

    #copy original column
    mdf_train, suffixoverlap_results = \
    self._df_copy_train(mdf_train, column, binscolumn, suffixoverlap_results)
    
    mdf_test[binscolumn] = mdf_test[column].copy()

    #convert all values to either numeric or NaN
    mdf_train[binscolumn] = pd.to_numeric(mdf_train[binscolumn], errors='coerce')
    mdf_test[binscolumn] = pd.to_numeric(mdf_test[binscolumn], errors='coerce')

    #get mean of training data
    mean = mdf_train[binscolumn].mean()
    
    if mean != mean:
      mean = 0

#     #replace missing data with training set mean
#     mdf_train[binscolumn] = mdf_train[binscolumn].fillna(mean)
#     mdf_test[binscolumn] = mdf_test[binscolumn].fillna(mean)

    #evaluate train set for transformation parameters
    bn_min = mdf_train[binscolumn].min()
    bn_max = mdf_train[binscolumn].max()
    bn_delta = bn_max - bn_min
#     if bn_delta == 0:
#       bn_delta = 1
#     bn_count = int(np.ceil(bn_delta / bn_width))

    if bn_delta > 0 and bn_min == bn_min:
      
      if buckets is False or not isinstance(buckets, list):      
        #grab the intervals using qcut based on equal population in train set
        intervalset = pd.qcut(mdf_train[binscolumn], bincount, duplicates='drop').unique()
      else:
        intervalset = []
        #to be sort of consistent with what is returned from qcut
        #with difference that we're allowing buckets outside of range found in set
        for i in range(len(buckets)):
          if i == 0:
            #buckets[i]-1 will be replaced with -inf below
            intervalset.append(pd.Interval(buckets[i]-1, buckets[i]))
          elif i != len(buckets)-1:
            intervalset.append(pd.Interval(buckets[i-1], buckets[i]))
          else:
            intervalset.append(pd.Interval(buckets[i-1], buckets[i]))
            #buckets[i]+1 will be replaced with +inf below
            intervalset.append(pd.Interval(buckets[i], buckets[i]+1))
        bincount = len(intervalset)

      #note we're sorting here, and scrubbing any nan
      intervalset = sorted([interval for interval in intervalset if interval == interval])

      #we'll make the bottom interval open-ended at negative end
      firstinterval = pd.Interval(-np.inf, intervalset[0].right, closed='right')

      #and the last interval open-ended at positive end
      lastinterval = pd.Interval(intervalset[-1].left, np.inf, closed='right')

      #now create a list to apply in cut operatoin
      newinterval_list = []

      #now we'll assemble a list of intervals to prepare for the cut operation, 
      #replacing first and last with the open-ended
      for i in range(len(intervalset)):
        if i == 0:
          newinterval_list.append(firstinterval)
        elif i == len(intervalset)-1:
          newinterval_list.append(lastinterval)
        else:
          newinterval_list.append(intervalset[i])

      #now translate intervals to the list of boundaries for cut operatoin
      cutintervals = []

      for interval in newinterval_list:

        cutintervals.append(interval.left)

      cutintervals.append(np.inf)

      bn_count = len(newinterval_list)

      bins_id = []
      for i in range(bn_count):
        bins_id.append(str(i))

      bins_cuts = cutintervals

      #create bins based on standard deviation increments
  #     binscolumn = column + '_bnwd'
      mdf_train[binscolumn] = \
      pd.cut(mdf_train[binscolumn], bins = bins_cuts,  \
             labels = bins_id, precision=len(str(bn_count)), duplicates='drop')
      mdf_test[binscolumn] = \
      pd.cut(mdf_test[binscolumn], bins = bins_cuts,  \
             labels = bins_id, precision=len(str(bn_count)), duplicates='drop')

      foundinset = mdf_train[binscolumn].unique()

      textcolumns = []
      # for i in foundinset:
      for i in range(bn_count):
        if i == i:
          textcolumns.append(binscolumn + '_' + str(i))

      #postprocess_textsupport  will return columns in alphabetical order
      textcolumns.sort()
      
      suffixoverlap_results = \
      self._df_check_suffixoverlap(mdf_train, textcolumns, suffixoverlap_results)

      #process bins as a categorical set
      mdf_train = \
      self._postprocess_textsupport(mdf_train, binscolumn, {}, 'tempkey', {'textcolumns':textcolumns})
      mdf_test = \
      self._postprocess_textsupport(mdf_test, binscolumn, {}, 'tempkey', {'textcolumns':textcolumns})
      
      #initialize binscolumn once more
      mdf_train[binscolumn] = mdf_train[column].copy()
      mdf_test[binscolumn] = mdf_test[column].copy()
      
      mdf_train[binscolumn] = pd.to_numeric(mdf_train[binscolumn], errors='coerce')
      mdf_test[binscolumn] = pd.to_numeric(mdf_test[binscolumn], errors='coerce')
      
      if len(textcolumns) > 1:

        #for i in range(bincount):
        for i in range(len(textcolumns)):

          tlbn_column = binscolumn + '_' + str(i)

          if i == 0:
            
            mdf_train[tlbn_column] = \
            np.where(mdf_train[tlbn_column] == 1, \
                    (bins_cuts[i+1] - mdf_train[binscolumn]) / (bins_cuts[i+1] - bn_min), -1)

            mdf_test[tlbn_column] = \
            np.where(mdf_test[tlbn_column] == 1, \
                    (bins_cuts[i+1] - mdf_test[binscolumn]) / (bins_cuts[i+1] - bn_min), -1)

          elif i == bincount - 1:

            mdf_train[tlbn_column] = \
            np.where(mdf_train[tlbn_column] == 1, \
                    (mdf_train[binscolumn] - bins_cuts[i]) / (bn_max - bins_cuts[i]), -1)

            mdf_test[tlbn_column] = \
            np.where(mdf_test[tlbn_column] == 1, \
                    (mdf_test[binscolumn] - bins_cuts[i]) / (bn_max - bins_cuts[i]), -1)

          else:

            mdf_train[tlbn_column] = \
            np.where(mdf_train[tlbn_column] == 1, \
                    (mdf_train[binscolumn] - bins_cuts[i]) / (bins_cuts[i+1] - bins_cuts[i]), -1)

            mdf_test[tlbn_column] = \
            np.where(mdf_test[tlbn_column] == 1, \
                    (mdf_test[binscolumn] - bins_cuts[i]) / (bins_cuts[i+1] - bins_cuts[i]), -1)

#       #change data type for memory savings
#       for textcolumn in textcolumns:
#         mdf_train[textcolumn] = mdf_train[textcolumn].astype(np.int8)
#         mdf_test[textcolumn] = mdf_test[textcolumn].astype(np.int8)

      #delete the support column
      del mdf_train[binscolumn]
      del mdf_test[binscolumn]
      
      
    else:
      mdf_train[binscolumn] = 0
      mdf_test[binscolumn] = 0
      
      textcolumns = [binscolumn]
      
      bn_count = bincount
      bins_id = False
      bins_cuts = False

    #nmbrnormalization_dict = {'mean' : mean, 'std' : std}

    #there's still an edge scenario for custom buckets where nan get's populated, 
    #so this is just a hack to clean up
    for textcolumn in textcolumns:
      mdf_train[textcolumn] = mdf_train[textcolumn].fillna(-1)
      mdf_test[textcolumn] = mdf_test[textcolumn].fillna(-1)

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in textcolumns:

      #new parameter collected for driftreport
      tc_ratio = nc + '_ratio'
      tcratio = mdf_train[nc].sum() / mdf_train[nc].shape[0]

      nmbrnormalization_dict = {nc : {'binsmean' : mean, \
                                      'bn_min' : bn_min, \
                                      'bn_max' : bn_max, \
                                      'bn_delta' : bn_delta, \
                                      'bn_count' : bn_count, \
                                      'bins_id' : bins_id, \
                                      'bins_cuts' : bins_cuts, \
                                      'bincount' : bincount, \
                                      'bincount_tlbn' : bincount_orig, \
                                      'textcolumns' : textcolumns, \
                                      tc_ratio : tcratio}}

      column_dict = { nc : {'category' : 'tlbn', \
                            'origcategory' : category, \
                            'normalization_dict' : nmbrnormalization_dict, \
                            'origcolumn' : column, \
                            'inputcolumn' : column, \
                            'columnslist' : textcolumns, \
                            'categorylist' : textcolumns, \
                            'infillmodel' : False, \
                            'infillcomplete' : False, \
                            'suffixoverlap_results' : suffixoverlap_results, \
                            'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
       
    return mdf_train, mdf_test, column_dict_list
  
  def _process_bkt1(self, mdf_train, mdf_test, column, category, postprocess_dict, params = {}):
    '''
    #processes a numerical set by creating custom bins coresponding to 
    #parameter 'buckets' which defaults to [0,1]
    #first and last buckets unconstrained
    #and returning in one-hot encoded set
    
    #removes buckets without activations in train set
    '''
    
    suffixoverlap_results = {}
    
    #buckets can be passed as list for direct values or as a set to signal they are percent values
    if 'buckets' in params:
      buckets = params['buckets']
      origbuckets = params['buckets']
    else:
      buckets = [0,1,2]
      origbuckets = [0,1,2]
      
    binscolumn = column + '_bkt1'

    #store original column for later reversion
    mdf_train, suffixoverlap_results = \
    self._df_copy_train(mdf_train, column, binscolumn, suffixoverlap_results)
    
    mdf_test[binscolumn] = mdf_test[column].copy()

    #convert all values to either numeric or NaN
    mdf_train[binscolumn] = pd.to_numeric(mdf_train[binscolumn], errors='coerce')
    mdf_test[binscolumn] = pd.to_numeric(mdf_test[binscolumn], errors='coerce')

    #get mean of training data
    mean = mdf_train[binscolumn].mean()
    
    if mean != mean:
      mean = 0
    
    trainmax = mdf_train[binscolumn].max()
    trainmin = mdf_train[binscolumn].min()
    
    #if buckets is a set instead of list that signals to convert percentages to values
    if type(buckets) == type({1,2}):
      buckets = sorted(list(buckets))
      
      if trainmax != trainmax:
        trainmax = 0
      if trainmin != trainmin:
        trainmin = 0
        
      buckets = [(trainmax - trainmin) * x + trainmin for x in buckets]

    # #replace missing data with training set mean
    # mdf_train[binscolumn] = mdf_train[binscolumn].fillna(mean)
    # mdf_test[binscolumn] = mdf_test[binscolumn].fillna(mean)

    #assemble buckets  
    bins_cuts = buckets.copy()
    bins_cuts.insert(0, -np.inf)
    bins_cuts.insert(len(bins_cuts), np.inf)
    
    #create labels for bins
    bins_id = list(range(len(bins_cuts)-1))
    
    #create bins based on increments
#     binscolumn = column + '_bnwd'
    mdf_train[binscolumn] = \
    pd.cut(mdf_train[binscolumn], bins = bins_cuts,  \
           labels = bins_id, precision=len(str(len(bins_id))))
    mdf_test[binscolumn] = \
    pd.cut(mdf_test[binscolumn], bins = bins_cuts,  \
           labels = bins_id, precision=len(str(len(bins_id))))

    foundinset = mdf_train[binscolumn].unique()
    
    textcolumns = []
    for i in foundinset:
      textcolumns.append(binscolumn + '_' + str(i))
      
    #postprocess_textsupport  will return columns in alphabetical order
    textcolumns.sort()
    
    #remove nan for cases where value did not fall within range
    textcolumns = [x for x in textcolumns if x[-3:] != 'nan']
    
    suffixoverlap_results = \
    self._df_check_suffixoverlap(mdf_train, textcolumns, suffixoverlap_results)
    
    #process bins as a categorical set
    mdf_train = \
    self._postprocess_textsupport(mdf_train, binscolumn, {}, 'tempkey', {'textcolumns':textcolumns})
    mdf_test = \
    self._postprocess_textsupport(mdf_test, binscolumn, {}, 'tempkey', {'textcolumns':textcolumns})
    
    #change data type for memory savings
    for textcolumn in textcolumns:
      mdf_train[textcolumn] = mdf_train[textcolumn].astype(np.int8)
      mdf_test[textcolumn] = mdf_test[textcolumn].astype(np.int8)
    
    #delete the support column
    del mdf_train[binscolumn]
    del mdf_test[binscolumn]

    #create list of columns
    nmbrcolumns = textcolumns

    #nmbrnormalization_dict = {'mean' : mean, 'std' : std}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in textcolumns:
      
      #new parameter collected for driftreport
      tc_ratio = nc + '_ratio'
      tcratio = mdf_train[nc].sum() / mdf_train[nc].shape[0]

      nmbrnormalization_dict = {nc : {'binsmean' : mean, \
                                      'buckets_bkt1' : buckets, \
                                      'bins_cuts' : bins_cuts, \
                                      'bins_id' : bins_id, \
                                      'textcolumns' : textcolumns, \
                                       tc_ratio : tcratio, \
                                      'trainmax' : trainmax, \
                                      'trainmin' : trainmin, \
                                      'origbuckets_bkt1' : origbuckets}}

      column_dict = { nc : {'category' : 'bkt1', \
                            'origcategory' : category, \
                            'normalization_dict' : nmbrnormalization_dict, \
                            'origcolumn' : column, \
                            'inputcolumn' : column, \
                            'columnslist' : textcolumns, \
                            'categorylist' : textcolumns, \
                            'infillmodel' : False, \
                            'infillcomplete' : False, \
                            'suffixoverlap_results' : suffixoverlap_results, \
                            'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
       
    return mdf_train, mdf_test, column_dict_list
  
  def _process_bkt2(self, mdf_train, mdf_test, column, category, postprocess_dict, params = {}):
    '''
    #processes a numerical set by creating custom bins coresponding to 
    #parameter 'buckets' which defaults to [0,1]
    #first and last buckets bounded
    #and returning in one-hot encoded set
    
    #removes buckets without activations in train set
    '''
    
    suffixoverlap_results = {}
    
    #buckets can be passed as list for direct values or as a set to signal they are percent values
    if 'buckets' in params:
      buckets = params['buckets']
      origbuckets = params['buckets']
    else:
      buckets = [0,1,2]
      origbuckets = [0,1,2]
      
    binscolumn = column + '_bkt2'

    #store original column for later reversion
    mdf_train, suffixoverlap_results = \
    self._df_copy_train(mdf_train, column, binscolumn, suffixoverlap_results)
    
    mdf_test[binscolumn] = mdf_test[column].copy()

    #convert all values to either numeric or NaN
    mdf_train[binscolumn] = pd.to_numeric(mdf_train[binscolumn], errors='coerce')
    mdf_test[binscolumn] = pd.to_numeric(mdf_test[binscolumn], errors='coerce')

    #get mean of training data
    mean = mdf_train[binscolumn].mean()
    
    if mean != mean:
      mean = 0
      
    trainmax = mdf_train[binscolumn].max()
    trainmin = mdf_train[binscolumn].min()
    
    #if buckets is a set instead of list that signals to convert percentages to values
    if type(buckets) == type({1,2}):
      buckets = sorted(list(buckets))
      
      if trainmax != trainmax:
        trainmax = 0
      if trainmin != trainmin:
        trainmin = 0
        
      buckets = [(trainmax - trainmin) * x + trainmin for x in buckets]

    # #replace missing data with training set mean
    # mdf_train[binscolumn] = mdf_train[binscolumn].fillna(mean)
    # mdf_test[binscolumn] = mdf_test[binscolumn].fillna(mean)

    #assemble buckets  
    bins_cuts = buckets.copy()
#     bins_cuts.insert(0, -np.inf)
#     bins_cuts.insert(len(bins_cuts), np.inf)

    #create labels for bins
    bins_id = list(range(len(bins_cuts)-1))
    
    #create bins based on increments
#     binscolumn = column + '_bnwd'
    mdf_train[binscolumn] = \
    pd.cut(mdf_train[binscolumn], bins = bins_cuts,  \
           labels = bins_id, precision=len(str(len(bins_id))))
    mdf_test[binscolumn] = \
    pd.cut(mdf_test[binscolumn], bins = bins_cuts,  \
           labels = bins_id, precision=len(str(len(bins_id))))

    foundinset = mdf_train[binscolumn].unique()
    
    textcolumns = []
    for i in foundinset:
      textcolumns.append(binscolumn + '_' + str(i))
      
    #postprocess_textsupport  will return columns in alphabetical order
    textcolumns.sort()
    
    #remove nan for cases where value did not fall within range
    textcolumns = [x for x in textcolumns if x[-3:] != 'nan']
    
    suffixoverlap_results = \
    self._df_check_suffixoverlap(mdf_train, textcolumns, suffixoverlap_results)
    
    #process bins as a categorical set
    mdf_train = \
    self._postprocess_textsupport(mdf_train, binscolumn, {}, 'tempkey', {'textcolumns':textcolumns})
    mdf_test = \
    self._postprocess_textsupport(mdf_test, binscolumn, {}, 'tempkey', {'textcolumns':textcolumns})
    
    #change data type for memory savings
    for textcolumn in textcolumns:
      mdf_train[textcolumn] = mdf_train[textcolumn].astype(np.int8)
      mdf_test[textcolumn] = mdf_test[textcolumn].astype(np.int8)
    
    #delete the support column
    del mdf_train[binscolumn]
    del mdf_test[binscolumn]

    #nmbrnormalization_dict = {'mean' : mean, 'std' : std}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in textcolumns:
      
      #new parameter collected for driftreport
      tc_ratio = nc + '_ratio'
      tcratio = mdf_train[nc].sum() / mdf_train[nc].shape[0]

      nmbrnormalization_dict = {nc : {'binsmean' : mean, \
                                      'buckets_bkt2' : buckets, \
                                      'bins_cuts' : bins_cuts, \
                                      'bins_id' : bins_id, \
                                      'textcolumns' : textcolumns, \
                                       tc_ratio : tcratio, \
                                      'trainmax' : trainmax, \
                                      'trainmin' : trainmin, \
                                      'origbuckets_bkt2' : origbuckets}}

      column_dict = { nc : {'category' : 'bkt2', \
                            'origcategory' : category, \
                            'normalization_dict' : nmbrnormalization_dict, \
                            'origcolumn' : column, \
                            'inputcolumn' : column, \
                            'columnslist' : textcolumns, \
                            'categorylist' : textcolumns, \
                            'infillmodel' : False, \
                            'infillcomplete' : False, \
                            'suffixoverlap_results' : suffixoverlap_results, \
                            'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
       
    return mdf_train, mdf_test, column_dict_list
  
  def _process_bkt3(self, mdf_train, mdf_test, column, category, postprocess_dict, params = {}):
    '''
    #processes a numerical set by creating custom bins coresponding to 
    #parameter 'buckets' which defaults to [0,1]
    #first and last buckets unconstrained
    #and returning in ordinal encoded set
    
    #segments without activations are included
    
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    if 'buckets' in params:
        
      buckets = params['buckets']
    
    else:
      
      buckets = [0,1,2]
      
    binscolumn = column + '_bkt3'

    if inplace is not True:

      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self._df_copy_train(mdf_train, column, binscolumn, suffixoverlap_results)

      mdf_test[binscolumn] = mdf_test[column].copy()

    else:

      suffixoverlap_results = \
      self._df_check_suffixoverlap(mdf_train, binscolumn, suffixoverlap_results)

      mdf_train.rename(columns = {column : binscolumn}, inplace = True)
      mdf_test.rename(columns = {column : binscolumn}, inplace = True)

    #convert all values to either numeric or NaN
    mdf_train[binscolumn] = pd.to_numeric(mdf_train[binscolumn], errors='coerce')
    mdf_test[binscolumn] = pd.to_numeric(mdf_test[binscolumn], errors='coerce')

    #get mean of training data
    mean = mdf_train[binscolumn].mean()
    
    if mean != mean:
      mean = 0
      
    trainmax = mdf_train[binscolumn].max()
    trainmin = mdf_train[binscolumn].min()
    
    #if buckets is a set instead of list that signals to convert percentages to values
    if type(buckets) == type({1,2}):
      buckets = sorted(list(buckets))
      
      if trainmax != trainmax:
        trainmax = 0
      if trainmin != trainmin:
        trainmin = 0
        
      buckets = [(trainmax - trainmin) * x + trainmin for x in buckets]

    # #replace missing data with training set mean
    # mdf_train[binscolumn] = mdf_train[binscolumn].fillna(mean)
    # mdf_test[binscolumn] = mdf_test[binscolumn].fillna(mean)

    #assemble buckets  
    bins_cuts = buckets.copy()
    bins_cuts.insert(0, -np.inf)
    bins_cuts.insert(len(bins_cuts), np.inf)
    
    #create labels for bins
    bins_id = list(range(len(bins_cuts)-1))

    infill_activation = len(bins_cuts)-1
      
    #create bins based on standard deviation increments
#     binscolumn = column + '_bnwo'
    mdf_train[binscolumn] = \
    pd.cut(mdf_train[binscolumn], bins = bins_cuts,  \
           labels = bins_id, precision=len(str(len(bins_id))))
    mdf_test[binscolumn] = \
    pd.cut(mdf_test[binscolumn], bins = bins_cuts,  \
           labels = bins_id, precision=len(str(len(bins_id))))

    mdf_train[binscolumn] = mdf_train[binscolumn].astype(float)
    mdf_test[binscolumn] = mdf_test[binscolumn].astype(float)
    
    #replace missing data with infill_activation
    mdf_train[binscolumn] = mdf_train[binscolumn].fillna(infill_activation)
    mdf_test[binscolumn] = mdf_test[binscolumn].fillna(infill_activation)
    
    #returned data type is conditional on the size of encoding space
    if len(bins_cuts) < 254:
      mdf_train[binscolumn] = mdf_train[binscolumn].astype(np.uint8)
      mdf_test[binscolumn] = mdf_test[binscolumn].astype(np.uint8)
    elif len(bins_cuts) < 65534:
      mdf_train[binscolumn] = mdf_train[binscolumn].astype(np.uint16)
      mdf_test[binscolumn] = mdf_test[binscolumn].astype(np.uint16)
    else:
      mdf_train[binscolumn] = mdf_train[binscolumn].astype(np.uint32)
      mdf_test[binscolumn] = mdf_test[binscolumn].astype(np.uint32)

    #create list of columns
    nmbrcolumns = [binscolumn]
    
    #new driftreport metric ordl_activations_dict
    ordl_activations_dict = {}
    for unique in mdf_train[binscolumn].unique():
      sumcalc = (mdf_train[binscolumn] == unique).sum() 
      ratio = sumcalc / mdf_train[binscolumn].shape[0]
      ordl_activations_dict.update({unique:ratio})

    #nmbrnormalization_dict = {'mean' : mean, 'std' : std}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      nmbrnormalization_dict = {nc : {'binsmean' : mean, \
                                      'buckets' : buckets, \
                                      'bins_cuts' : bins_cuts, \
                                      'bins_id' : bins_id, \
                                      'activations_list' : bins_id, \
                                      'infill_activation' : infill_activation, \
                                      'ordl_activations_dict' : ordl_activations_dict, \
                                      'trainmax' : trainmax, \
                                      'trainmin' : trainmin}}

      column_dict = { nc : {'category' : 'bkt3', \
                            'origcategory' : category, \
                            'normalization_dict' : nmbrnormalization_dict, \
                            'origcolumn' : column, \
                            'inputcolumn' : column, \
                            'columnslist' : nmbrcolumns, \
                            'categorylist' : nmbrcolumns, \
                            'infillmodel' : False, \
                            'infillcomplete' : False, \
                            'suffixoverlap_results' : suffixoverlap_results, \
                            'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())

    return mdf_train, mdf_test, column_dict_list
  
  def _process_bkt4(self, mdf_train, mdf_test, column, category, postprocess_dict, params = {}):
    '''
    #processes a numerical set by creating custom bins coresponding to 
    #parameter 'buckets' which defaults to [0,1]
    #first and last buckets bounded
    #and returning in ordinal encoded set
    
    #segments without activations are included
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    if 'buckets' in params:
        
      buckets = params['buckets']
    
    else:
      
      buckets = [0,1,2]
      
    binscolumn = column + '_bkt4'

    if inplace is not True:

      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self._df_copy_train(mdf_train, column, binscolumn, suffixoverlap_results)

      mdf_test[binscolumn] = mdf_test[column].copy()

    else:

      suffixoverlap_results = \
      self._df_check_suffixoverlap(mdf_train, binscolumn, suffixoverlap_results)

      mdf_train.rename(columns = {column : binscolumn}, inplace = True)
      mdf_test.rename(columns = {column : binscolumn}, inplace = True)

    #convert all values to either numeric or NaN
    mdf_train[binscolumn] = pd.to_numeric(mdf_train[binscolumn], errors='coerce')
    mdf_test[binscolumn] = pd.to_numeric(mdf_test[binscolumn], errors='coerce')
    
    trainmax = mdf_train[binscolumn].max()
    trainmin = mdf_train[binscolumn].min()
    
    #if buckets is a set instead of list that signals to convert percentages to values
    if type(buckets) == type({1,2}):
      buckets = sorted(list(buckets))
      
      if trainmax != trainmax:
        trainmax = 0
      if trainmin != trainmin:
        trainmin = 0
        
      buckets = [(trainmax - trainmin) * x + trainmin for x in buckets]
    
    #set all values that fall outside of bounded buckets to nan for replacement with mean
    mdf_train.loc[mdf_train[binscolumn] <= buckets[0], (binscolumn)] = np.nan
    mdf_test.loc[mdf_test[binscolumn] <= buckets[0], (binscolumn)] = np.nan
    
    mdf_train.loc[mdf_train[binscolumn] > buckets[-1], (binscolumn)] = np.nan
    mdf_test.loc[mdf_test[binscolumn] > buckets[-1], (binscolumn)] = np.nan

    #get mean of training data
    mean = mdf_train[binscolumn].mean()
    
    if mean != mean:
      mean = 0
      
    #edge case, if mean does nto fall within buckets range, we'll apply infill to top bucket
    #this edge case specific to bkt4
    #this assumes buckets was passed with sorted values
    if mean < buckets[0] or mean > buckets[-1]:
      mean = buckets[-1]

    # #replace missing data with training set mean
    # mdf_train[binscolumn] = mdf_train[binscolumn].fillna(mean)
    # mdf_test[binscolumn] = mdf_test[binscolumn].fillna(mean)

    #assemble buckets  
    bins_cuts = buckets.copy()
#     bins_cuts.insert(0, -np.inf)
#     bins_cuts.insert(len(bins_cuts), np.inf)

    #create labels for bins
    bins_id = list(range(len(bins_cuts)-1))

    infill_activation = len(bins_cuts)-1
      
    #create bins based on standard deviation increments
#     binscolumn = column + '_bnwo'
    mdf_train[binscolumn] = \
    pd.cut(mdf_train[binscolumn], bins = bins_cuts,  \
           labels = bins_id, precision=len(str(len(bins_id))))
    mdf_test[binscolumn] = \
    pd.cut(mdf_test[binscolumn], bins = bins_cuts,  \
           labels = bins_id, precision=len(str(len(bins_id))))
    
    mdf_train[binscolumn] = mdf_train[binscolumn].astype(float)
    mdf_test[binscolumn] = mdf_test[binscolumn].astype(float)
    
    #replace missing data with infill_activation
    mdf_train[binscolumn] = mdf_train[binscolumn].fillna(infill_activation)
    mdf_test[binscolumn] = mdf_test[binscolumn].fillna(infill_activation)

    #returned data type is conditional on the size of encoding space
    if len(bins_cuts) < 254:
      mdf_train[binscolumn] = mdf_train[binscolumn].astype(np.uint8)
      mdf_test[binscolumn] = mdf_test[binscolumn].astype(np.uint8)
    elif len(bins_cuts) < 65534:
      mdf_train[binscolumn] = mdf_train[binscolumn].astype(np.uint16)
      mdf_test[binscolumn] = mdf_test[binscolumn].astype(np.uint16)
    else:
      mdf_train[binscolumn] = mdf_train[binscolumn].astype(np.uint32)
      mdf_test[binscolumn] = mdf_test[binscolumn].astype(np.uint32)

    #create list of columns
    nmbrcolumns = [binscolumn]
    
    #new driftreport metric ordl_activations_dict
    ordl_activations_dict = {}
    for unique in mdf_train[binscolumn].unique():
      sumcalc = (mdf_train[binscolumn] == unique).sum() 
      ratio = sumcalc / mdf_train[binscolumn].shape[0]
      ordl_activations_dict.update({unique:ratio})

    #nmbrnormalization_dict = {'mean' : mean, 'std' : std}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      nmbrnormalization_dict = {nc : {'binsmean' : mean, \
                                      'buckets' : buckets, \
                                      'bins_cuts' : bins_cuts, \
                                      'bins_id' : bins_id, \
                                      'activations_list' : bins_id, \
                                      'infill_activation' : infill_activation, \
                                      'ordl_activations_dict' : ordl_activations_dict, \
                                      'trainmax' : trainmax, \
                                      'trainmin' : trainmin}}

      column_dict = { nc : {'category' : 'bkt4', \
                            'origcategory' : category, \
                            'normalization_dict' : nmbrnormalization_dict, \
                            'origcolumn' : column, \
                            'inputcolumn' : column, \
                            'columnslist' : nmbrcolumns, \
                            'categorylist' : nmbrcolumns, \
                            'infillmodel' : False, \
                            'infillcomplete' : False, \
                            'suffixoverlap_results' : suffixoverlap_results, \
                            'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())

    return mdf_train, mdf_test, column_dict_list

  def _process_DPnb(self, mdf_train, mdf_test, column, category, postprocess_dict, params = {}):
    '''
    #process_DPnb(mdf_train, mdf_test, column, category, postprocess_dict, params = {})
    #function to inject noise to training data, such as for differential privacy purposes
    #assumes input is numeric data with z-score normalization to mean 0 and sigma 1
    #adds data sampled from normal distribution with mean 0 and sigma 0.06 by default
    #where noise only injected to a subset of data based on flip_prob defaulting to 0.03
    #the noise properties may be customized with parameters 'mu', 'sigma', 'flip_prob'
    #note that the noise is only injected into the designated training data of df_train
    #for test data this is a pass-through operation
    #note this assumes clean data as input since this will be intended for downstream applicaiton
    #in family trees, so no infill is performed
    #note that for postprocess function in postmunge, determination of whether to treat
    #df_test as train or test data is based on the traindata entry in postprocess_dict
    #in automunge df_test is treated as test data by default
    '''
    
    suffixoverlap_results = {}
    
    #initialize parameters
    if 'mu' in params:
      mu = params['mu']
    else:
      mu = 0.0
      
    if 'sigma' in params:
      sigma = params['sigma']
    else:
      sigma = 0.06
      
    if 'flip_prob' in params:
      flip_prob = params['flip_prob']
    else:
      flip_prob = 0.03
      
    if 'noisedistribution' in params:
      noisedistribution = params['noisedistribution']
    else:
      #can pass as 'normal' or 'laplace'
      noisedistribution = 'normal'
      
    if 'testnoise' in params:
      testnoise = params['testnoise']
    else:
      testnoise = False
      
    DPnm_column = column + '_DPnb'
    
    suffixoverlap_results = \
    self._df_check_suffixoverlap(mdf_train, DPnm_column, suffixoverlap_results)
      
    #first we'll derive our sampled noise for injection
    if noisedistribution == 'normal':
      normal_samples = np.random.normal(loc=mu, scale=sigma, size=(mdf_train.shape[0]))
    elif noisedistribution == 'laplace':
      normal_samples = np.random.laplace(loc=mu, scale=sigma, size=(mdf_train.shape[0]))
      
    binomial_samples = np.random.binomial(n=1, p=flip_prob, size=(mdf_train.shape[0]))
    
    mdf_train[DPnm_column] = pd.DataFrame(normal_samples) * pd.DataFrame(binomial_samples)
    
    #now inject noise
    mdf_train[DPnm_column] = mdf_train[DPnm_column] + mdf_train[column]
    
    #for test data is just pass-through unless testnoise parameter activated
    if testnoise is False:
      mdf_test[DPnm_column] = mdf_test[column].copy()
    
    elif testnoise is True:
      #first we'll derive our sampled noise for injection
      if noisedistribution == 'normal':
        normal_samples = np.random.normal(loc=mu, scale=sigma, size=(mdf_test.shape[0]))
      elif noisedistribution == 'laplace':
        normal_samples = np.random.laplace(loc=mu, scale=sigma, size=(mdf_test.shape[0]))

      binomial_samples = np.random.binomial(n=1, p=flip_prob, size=(mdf_test.shape[0]))

      mdf_test[DPnm_column] = pd.DataFrame(normal_samples) * pd.DataFrame(binomial_samples)
      
      #now inject noise
      mdf_test[DPnm_column] = mdf_test[DPnm_column] + mdf_test[column]
    
    #create list of columns
    nmbrcolumns = [DPnm_column]

    nmbrnormalization_dict = {DPnm_column : {'mu' : mu, \
                                             'sigma' : sigma, \
                                             'flip_prob' : flip_prob, \
                                             'testnoise' : testnoise, \
                                             'noisedistribution' : noisedistribution}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'DPnb', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
        
    return mdf_train, mdf_test, column_dict_list

  def _process_DPmm(self, mdf_train, mdf_test, column, category, postprocess_dict, params = {}):
    '''
    #process_DPmm(mdf_train, mdf_test, column, category, postprocess_dict, params = {})
    #function to inject noise to training data, such as for differential privacy purposes
    #assumes input is numeric data min-max scaled within range 0-1
    #adds data sampled from normal distribution with mean 0 and sigma 0.03 by default
    #the noise properties may be customized with parameters 'mu', 'sigma'
    #also accepts parameter 'flip_prob' for ratio of data that will be adjusted (defaults to 1.)
    #noise is scaled based on the recieved points to keep within range 0-1
    #(e.g. for recieved data point 0.1, noise is scaled so as not to fall below -0.1)
    #gaussian noise source is also capped to maintain the range -0.5 to 0.5 (rare outlier points)
    #note that the noise is only injected into the designated training data of df_train
    #for test data this is a pass-through operation
    #note this assumes clean data as input since this will be intended for downstream applicaiton
    #in family trees, so no infill is performed
    #note that for postprocess function in postmunge, determination of whether to treat
    #df_test as train or test data is based on the traindata entry in postprocess_dict
    #in automunge df_test is treated as test data by default
    '''
    
    suffixoverlap_results = {}
    
    #initialize parameters
    if 'mu' in params:
      mu = params['mu']
    else:
      mu = 0.0
      
    if 'sigma' in params:
      sigma = params['sigma']
    else:
      sigma = 0.03
      
    if 'flip_prob' in params:
      flip_prob = params['flip_prob']
    else:
      flip_prob = 0.03
      
    if 'noisedistribution' in params:
      noisedistribution = params['noisedistribution']
    else:
      #can pass as 'normal' or 'laplace'
      noisedistribution = 'normal'
      
    if 'testnoise' in params:
      testnoise = params['testnoise']
    else:
      testnoise = False
      
    DPmm_column = column + '_DPmm'
    DPmm_column_temp1 = column + '_DPmm' + '_tmp1'
    
    suffixoverlap_results = \
    self._df_check_suffixoverlap(mdf_train, [DPmm_column, DPmm_column_temp1], suffixoverlap_results)
    
    def _injectmmnoise(df, DPmm_column, DPmm_column_temp1):
      #support function for noise injection
      
      #first we'll derive our sampled noise for injection
      if noisedistribution == 'normal':
        normal_samples = np.random.normal(loc=mu, scale=sigma, size=(df.shape[0]))
      elif noisedistribution == 'laplace':
        normal_samples = np.random.laplace(loc=mu, scale=sigma, size=(df.shape[0]))
      binomial_samples = np.random.binomial(n=1, p=flip_prob, size=(df.shape[0]))

      df[DPmm_column] = pd.DataFrame(normal_samples) * pd.DataFrame(binomial_samples)

      #cap outliers
      df[DPmm_column] = np.where(df[DPmm_column] < -0.5, -0.5, df[DPmm_column])
      df[DPmm_column] = np.where(df[DPmm_column] > 0.5, 0.5, df[DPmm_column])

      #adjacent cell infill (this is included as a precaution shouldn't have any effect since upstream normalization)
      df[DPmm_column] = df[DPmm_column].fillna(method='ffill')
      df[DPmm_column] = df[DPmm_column].fillna(method='bfill')

      #support column to signal sign of noise, 0 is neg, 1 is pos
      df[DPmm_column_temp1] = 0
      df[DPmm_column_temp1] = np.where(df[DPmm_column] >= 0., 1, df[DPmm_column_temp1])

      #now inject noise, with scaled noise to maintain range 0-1
      #(so if mnmx value <0.5, and neg noise, we scale noise to maintain ratio as if minmax was 0.5, similarly for >0.5 mnmx)
      df[DPmm_column] = np.where(df[column] < 0.5, \
                                  df[column] + \
                                  (1 - df[DPmm_column_temp1]) * (df[DPmm_column] * df[column] / 0.5) + \
                                  (df[DPmm_column_temp1]) * (df[DPmm_column]), \
                                  df[DPmm_column])

      df[DPmm_column] = np.where(df[column] >= 0.5, \
                                  df[column] + \
                                  (1 - df[DPmm_column_temp1]) * (df[DPmm_column]) + \
                                  (df[DPmm_column_temp1]) * (df[DPmm_column] * (1 - df[column]) / 0.5), \
                                  df[DPmm_column])

      #remove support column
      del df[DPmm_column_temp1]
      
      return df
    
    mdf_train = _injectmmnoise(mdf_train, DPmm_column, DPmm_column_temp1)
    
    #for test data is just pass-through unless testnoise is activated
    if testnoise is False:
      mdf_test[DPmm_column] = mdf_test[column].copy()
    elif testnoise is True:
      mdf_test = _injectmmnoise(mdf_test, DPmm_column, DPmm_column_temp1)
    
    #create list of columns
    nmbrcolumns = [DPmm_column]

    nmbrnormalization_dict = {DPmm_column : {'mu' : mu, \
                                             'sigma' : sigma, \
                                             'flip_prob' : flip_prob, \
                                             'testnoise' : testnoise, \
                                             'noisedistribution' : noisedistribution}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'DPmm', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
        
    return mdf_train, mdf_test, column_dict_list

  def _process_DPrt(self, mdf_train, mdf_test, column, category, postprocess_dict, params = {}):
    """
    #process_DPrt 
    #function to scale data as follows:
    
    # if max >= 0 and min <= 0:
    #   #scaling based on 
    #   x = x / (max - min)

    # elif max >= 0 and min >= 0:
    #   #traditional min/max
    #   x = (x - min) / (max - min)
    
    # elif max <= 0 and min <= 0:
    #   #max/min (retains negative values)
    #   x = (x - max) / (max - min)
    
    #followed by a noise injection similar to DPmm based based on this set's retn range
    
    #replaces missing or improperly formatted data with mean of remaining values
    #(prior to noise injection)
    
    #returns same dataframes with new column of name column + '_DPrt'
    #note this is a "dualprocess" function since is applied to both dataframes
    
    #note with parameters divisor can also be set as standard deviation
    #also aprameters accepted for cap/floor/mulitplier/offset
    #where cap/floor based on pretransform values
    #multiplier/offset based on posttransform values, muoltiplier applied betfore offset
    """
    
    suffixoverlap_results = {}
    
    #accepts divisor parameters of 'minmax' or 'std', eg divisor for normalization equation
    #note that standard deviation doesn't have same properties for sign retention when all values > or < 0
    if 'divisor' in params:
      divisor = params['divisor']
    else:
      divisor = 'minmax'
    
    #offset is just an added constant applied after multiplier
    if 'offset' in params:
      offset = params['offset']
    else:
      offset = 0
      
    #multiplier scales the set by multiplication prior to offset
    if 'multiplier' in params:
      multiplier = params['multiplier']
    else:
      multiplier = 1
    
    #cap can be passed as True for max of training data or as a specific value prior to normalization, False for no cap
    if 'cap' in params:
      cap = params['cap']
    else:
      cap = False
      
    #floor can be passed as True for min of training data or as a specific value prior to normalization, False for no floor
    if 'floor' in params:
      floor = params['floor']
    else:
      floor = False
      
    #here are differential privacy parameters
    if 'mu' in params:
      mu = params['mu']
    else:
      mu = 0.0
      
    if 'sigma' in params:
      sigma = params['sigma']
    else:
      sigma = 0.03
      
    if 'flip_prob' in params:
      flip_prob = params['flip_prob']
    else:
      flip_prob = 0.03
      
    if 'noisedistribution' in params:
      noisedistribution = params['noisedistribution']
    else:
      #can pass as 'normal' or 'laplace'
      noisedistribution = 'normal'
      
    if 'testnoise' in params:
      testnoise = params['testnoise']
    else:
      testnoise = False
      
    DPrt_column = column + '_DPrt'
    DPrt_column_temp1 = column + '_DPrt' + '_tmp1'
    DPrt_column_temp2 = column + '_DPrt' + '_tmp2'
    
    newcolumns = [DPrt_column, DPrt_column_temp1, DPrt_column_temp2]
    
    suffixoverlap_results = \
    self._df_check_suffixoverlap(mdf_train, newcolumns, suffixoverlap_results)
    
    #copy source column into new column
    mdf_train[DPrt_column] = mdf_train[column].copy()
    mdf_test[DPrt_column] = mdf_test[column].copy()

    #convert all values to either numeric or NaN
    mdf_train[DPrt_column] = pd.to_numeric(mdf_train[DPrt_column], errors='coerce')
    mdf_test[DPrt_column] = pd.to_numeric(mdf_test[DPrt_column], errors='coerce')
    
    #a few more metrics collected for driftreport
    #get standard deviation of training data
    std = mdf_train[DPrt_column].std()
    
    mad = mdf_train[DPrt_column].mad()
    
    #get maximum value of training column
    maximum = mdf_train[DPrt_column].max()
    
    #get minimum value of training column
    minimum = mdf_train[DPrt_column].min()
    
    #avoid outlier div by zero when max = min
    maxminusmin = maximum - minimum
    if maxminusmin == 0 or maxminusmin != maxminusmin:
      maxminusmin = 1
      
    if std != std or std == 0:
      std = 1
      
    if mad != mad or mad == 0:
      mad = 1
      
    #if cap < maximum, maximum = cap
    if cap is not False and cap is not True:
      if cap < maximum:
        maximum = cap
    if floor is not False and floor is not True:
      if floor > minimum:
        minimum = floor
        
    #cap and floor application
    if cap is True:
      cap = maximum
    if floor is True:
      floor = minimum
      
    if cap is not False:
      #replace values in test > cap with cap
      mdf_train.loc[mdf_train[DPrt_column] > cap, (DPrt_column)] \
      = cap
      
      mdf_test.loc[mdf_test[DPrt_column] > cap, (DPrt_column)] \
      = cap
    
    if floor is not False:
      #replace values in test < floor with floor
      mdf_train.loc[mdf_train[DPrt_column] < floor, (DPrt_column)] \
      = floor
      
      mdf_test.loc[mdf_test[DPrt_column] < floor, (DPrt_column)] \
      = floor
      
    #get mean of training data
    mean = mdf_train[DPrt_column].mean()
    if mean != mean:
      mean = 0
    
    #replace missing data with training set mean
    mdf_train[DPrt_column] = mdf_train[DPrt_column].fillna(mean)
    mdf_test[DPrt_column] = mdf_test[DPrt_column].fillna(mean)
    
    #edge case (only neccesary so scalingapproach is assigned)
    if maximum != maximum:
      maximum = 0
    if minimum != minimum:
      minimum = 0
      
    #divisor
    if divisor not in {'minmax', 'std', 'mad'}:
      print("Error: retn transform parameter 'divisor' only accepts entries of 'minmax' 'mad' or 'std'")
    if divisor == 'minmax':
      divisor = maxminusmin
    elif divisor == 'mad':
      divisor = mad
    else:
      divisor = std
      
    if divisor == 0 or divisor != divisor:
      divisor = 1
    
    #driftreport metric scalingapproach returned as 'retn' or 'mnmx' or 'mxmn'
    #where mnmx is for cases where all values in train set are positive
    #mxmn is for cases where all values in train set are negative
    
    if maximum >= 0 and minimum <= 0:
      
      mdf_train[DPrt_column] = (mdf_train[DPrt_column]) / \
                                    (divisor) * multiplier + offset
      
      mdf_test[DPrt_column] = (mdf_test[DPrt_column]) / \
                                    (divisor) * multiplier + offset
      
      scalingapproach = 'retn'
      
    elif maximum >= 0 and minimum >= 0:
    
      #perform min-max scaling to train and test sets using values from train
      mdf_train[DPrt_column] = (mdf_train[DPrt_column] - minimum) / \
                                    (divisor) * multiplier + offset

      mdf_test[DPrt_column] = (mdf_test[DPrt_column] - minimum) / \
                                   (divisor) * multiplier + offset
      
      scalingapproach = 'mnmx'
      
    elif maximum <= 0 and minimum <= 0:
    
      #perform min-max scaling to train and test sets using values from train
      mdf_train[DPrt_column] = (mdf_train[DPrt_column] - maximum) / \
                                    (divisor) * multiplier + offset

      mdf_test[DPrt_column] = (mdf_test[DPrt_column] - maximum) / \
                                   (divisor) * multiplier + offset
      
      scalingapproach = 'mxmn'
      
      
    #now apply noise injection
    
    def _injectrtnoise(df, DPrt_column, DPrt_column_temp1, DPrt_column_temp2):
      #support function for DPrt noise injection
    
      #first we'll derive our sampled noise for injection
      if noisedistribution == 'normal':
        normal_samples = np.random.normal(loc=mu, scale=sigma, size=(df.shape[0]))
      elif noisedistribution == 'laplace':
        normal_samples = np.random.laplace(loc=mu, scale=sigma, size=(df.shape[0]))

      binomial_samples = np.random.binomial(n=1, p=flip_prob, size=(df.shape[0]))

      df[DPrt_column_temp2] = pd.DataFrame(normal_samples) * pd.DataFrame(binomial_samples)

      #cap outliers
      df[DPrt_column_temp2] = np.where(df[DPrt_column_temp2] < -0.5, -0.5, df[DPrt_column_temp2])
      df[DPrt_column_temp2] = np.where(df[DPrt_column_temp2] > 0.5, 0.5, df[DPrt_column_temp2])

      #support column to signal sign of noise, 0 is neg, 1 is pos
      df[DPrt_column_temp1] = 0
      df[DPrt_column_temp1] = np.where(df[DPrt_column_temp2] >= 0., 1, df[DPrt_column_temp1])

      #for noise injection we'll first move data into range 0-1 and then revert after injection
      if scalingapproach == 'retn':
        df[DPrt_column] = (df[DPrt_column] - (minimum / divisor) ) / multiplier - offset
      elif scalingapproach == 'mnmx':
        df[DPrt_column] = (df[DPrt_column]) / multiplier - offset
      elif scalingapproach == 'mxmn':
        df[DPrt_column] = (df[DPrt_column] + (maximum - minimum) / divisor) / multiplier - offset


      #now inject noise, with scaled noise to maintain range 0-1
      #(so if mnmx value <0.5, and neg noise, we scale noise to maintain ratio as if minmax was 0.5, similarly for >0.5 mnmx)
      df[DPrt_column] = np.where(df[DPrt_column] < 0.5, \
                                  df[DPrt_column] + \
                                  (1 - df[DPrt_column_temp1]) * (df[DPrt_column_temp2] * df[DPrt_column] / 0.5) + \
                                  (df[DPrt_column_temp1]) * (df[DPrt_column_temp2]), \
                                  df[DPrt_column])

      df[DPrt_column] = np.where(df[DPrt_column] >= 0.5, \
                                  df[DPrt_column] + \
                                  (1 - df[DPrt_column_temp1]) * (df[DPrt_column_temp2]) + \
                                  (df[DPrt_column_temp1]) * (df[DPrt_column_temp2] * (1 - df[DPrt_column]) / 0.5), \
                                  df[DPrt_column])

      #remove support columns
      del df[DPrt_column_temp1]
      del df[DPrt_column_temp2]

      #for noise injection we'll first move data into range 0-1 and then revert after injection
      if scalingapproach == 'retn':
        df[DPrt_column] = (df[DPrt_column] + (minimum / divisor) ) * multiplier + offset
      elif scalingapproach == 'mnmx':
        df[DPrt_column] = (df[DPrt_column]) * multiplier + offset
      elif scalingapproach == 'mxmn':
        df[DPrt_column] = (df[DPrt_column] - (maximum - minimum) / divisor) * multiplier + offset
        
      return df
    
    mdf_train = _injectrtnoise(mdf_train, DPrt_column, DPrt_column_temp1, DPrt_column_temp2)
    
    #for test data is just pass-through
    #mdf_test[DPrt_column] = mdf_test[DPrt_column]
    if testnoise is True:
      mdf_test = _injectrtnoise(mdf_test, DPrt_column, DPrt_column_temp1, DPrt_column_temp2)
    
    #create list of columns
    nmbrcolumns = [DPrt_column]
    
    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []
    
    nmbrnormalization_dict = {DPrt_column : {'mu' : mu, \
                                             'sigma' : sigma, \
                                             'flip_prob' : flip_prob, \
                                             'noisedistribution' : noisedistribution, \
                                             'minimum' : minimum, \
                                             'maximum' : maximum, \
                                             'mean' : mean, \
                                             'std' : std, \
                                             'mad' : mad, \
                                             'scalingapproach' : scalingapproach, \
                                             'offset' : offset, \
                                             'multiplier': multiplier, \
                                             'cap' : cap, \
                                             'floor' : floor, \
                                             'divisor' : divisor, \
                                             'testnoise' : testnoise, \
                                            }}
    
    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'DPrt', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
        
    return mdf_train, mdf_test, column_dict_list

  def _process_DPbn(self, mdf_train, mdf_test, column, category, postprocess_dict, params = {}):
    '''
    #process_DPbn(mdf_train, mdf_test, column, category, postprocess_dict, params = {})
    #function to inject noise to training data, such as for differential privacy purposes
    #assumes input is bnry encoded data (i.e. boolean integers in single column)
    #adds data sampled from Bernoulli distribution with flip_prob 0.03 by default
    #the noise properties may be customized with parameter 'flip_prob'
    #note that the noise is only injected into the designated training data of df_train
    #for test data this is a pass-through operation
    #note this assumes clean data as input since this will be intended for downstream applicaiton
    #in family trees, so no infill is performed
    #note that for postprocess function in postmunge, determination of whether to treat
    #df_test as train or test data is based on the traindata entry in postprocess_dict
    #in automunge df_test is treated as test data by default
    '''
    
    suffixoverlap_results = {}
    
    #initialize parameters
    if 'flip_prob' in params:
      flip_prob = params['flip_prob']
    else:
      flip_prob = 0.03
      
    if 'testnoise' in params:
      testnoise = params['testnoise']
    else:
      testnoise = False
      
    DPbn_column = column + '_DPbn'
    
    suffixoverlap_results = \
    self._df_check_suffixoverlap(mdf_train, DPbn_column, suffixoverlap_results)
      
    #first we'll derive our sampled noise for injection
    mdf_train[DPbn_column] = pd.DataFrame(np.random.binomial(n=1, p=flip_prob, size=(mdf_train.shape[0])))
    
    #now inject noise
    mdf_train[DPbn_column] = abs(mdf_train[column] - mdf_train[DPbn_column])
    
    #for test data is just pass-through unless testnoise is activated
    if testnoise is False:
      mdf_test[DPbn_column] = mdf_test[column].copy()
    elif testnoise is True:
      #first we'll derive our sampled noise for injection
      mdf_test[DPbn_column] = pd.DataFrame(np.random.binomial(n=1, p=flip_prob, size=(mdf_test.shape[0])))

      #now inject noise
      mdf_test[DPbn_column] = abs(mdf_test[column] - mdf_test[DPbn_column])
      
    
    #create list of columns
    nmbrcolumns = [DPbn_column]

    nmbrnormalization_dict = {DPbn_column : {'flip_prob' : flip_prob, \
                                             'testnoise' : testnoise}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'DPbn', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
        
    return mdf_train, mdf_test, column_dict_list

  def _process_DPod(self, mdf_train, mdf_test, column, category, postprocess_dict, params = {}):
    '''
    #process_DPod(mdf_train, mdf_test, column, category, postprocess_dict, params = {})
    #function to inject noise to training data, such as for differential privacy purposes
    #assumes input is ordinal encoded data (i.e. categoric by integer in single column)
    #adds data sampled from Bernoulli distribution with flip_prob 0.03 by default
    #the noise properties may be customized with parameter 'flip_prob'
    #when flip activated selects from the set of encodings per level random draw
    #(including potenitally the current encoding for no flip)
    #note that the noise is only injected into the designated training data of df_train
    #for test data this is a pass-through operation
    #note this assumes clean data as input since this will be intended for downstream applicaiton
    #in family trees, so no infill is performed
    #note that for postprocess function in postmunge, determination of whether to treat
    #df_test as train or test data is based on the traindata entry in postprocess_dict
    #in automunge df_test is treated as test data by default
    '''
    
    suffixoverlap_results = {}
    
    #initialize parameters
    if 'flip_prob' in params:
      flip_prob = params['flip_prob']
    else:
      flip_prob = 0.03
      
    if 'testnoise' in params:
      testnoise = params['testnoise']
    else:
      testnoise = False
      
    DPod_column = column + '_DPod'
    DPod_tempcolumn1 = column + '_DPod_tmp1'
    DPod_tempcolumn2 = column + '_DPod_tmp2'
    
    newcolumns = [DPod_column, DPod_tempcolumn1, DPod_tempcolumn2]
    
    suffixoverlap_results = \
    self._df_check_suffixoverlap(mdf_train, newcolumns, suffixoverlap_results)
    
    #we'll want to know the set of activations present in column, for automunge this is unique values
    ord_encodings = mdf_train[column].unique()
      
    #first we'll derive our sampled noise for injection
    mdf_train[DPod_tempcolumn1] = pd.DataFrame(np.random.binomial(n=1, p=flip_prob, size=(mdf_train.shape[0])))
    mdf_train[DPod_tempcolumn2] = pd.DataFrame(np.random.choice(ord_encodings, size=(mdf_train.shape[0])))
    
    #now inject noise
    #this returns column value when DPod_tempcolumn1 is 0 or DPod_tempcolumn2 when DPod_tempcolumn1 is 1
    mdf_train[DPod_column] = \
    mdf_train[column] * (1 - mdf_train[DPod_tempcolumn1]) + mdf_train[DPod_tempcolumn1] * mdf_train[DPod_tempcolumn2]
      
    del mdf_train[DPod_tempcolumn1]
    del mdf_train[DPod_tempcolumn2]
    
    #for test data is just pass-through unless testnoise is activated
    if testnoise is False:
      mdf_test[DPod_column] = mdf_test[column].copy()
    elif testnoise is True:
      #first we'll derive our sampled noise for injection
      mdf_test[DPod_tempcolumn1] = pd.DataFrame(np.random.binomial(n=1, p=flip_prob, size=(mdf_test.shape[0])))
      mdf_test[DPod_tempcolumn2] = pd.DataFrame(np.random.choice(ord_encodings, size=(mdf_test.shape[0])))

      #now inject noise
      #this returns column value when DPod_tempcolumn1 is 0 or DPod_tempcolumn2 when DPod_tempcolumn1 is 1
      mdf_test[DPod_column] = \
      mdf_test[column] * (1 - mdf_test[DPod_tempcolumn1]) + mdf_test[DPod_tempcolumn1] * mdf_test[DPod_tempcolumn2]

      del mdf_test[DPod_tempcolumn1]
      del mdf_test[DPod_tempcolumn2]

    #create list of columns
    nmbrcolumns = [DPod_column]

    nmbrnormalization_dict = {DPod_column : {'flip_prob' : flip_prob, \
                                             'testnoise' : testnoise}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'DPod', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
        
    return mdf_train, mdf_test, column_dict_list

  def _process_qbt1(self, df, column, category, postprocess_dict, params = {}):
    """
    #translates numerical entries to Q notation
    #which is a kind of binary encoding
    #in which there are n bits devoted to integers and m bits to fractionals
    #where m and n are configuratble by parameter, and default to 
    #integer_bits = 3, fractional_bits = 12, sign_bit = 1
    #these values are arbitary, and with these defaults 
    #entries exceeding/below +/- 8.000 are subject to overflow
    #returns encodings in a set of 16 columns
    #with suffix _qbt1_2^#
    #where # is entry in range(integer_bits) or entry in -1 * range(fractional_bits)
    #and suffix for sign bit if included is _qbt1_sign
    #and entries are in order of sign, integers max->min, fractional min->max
    #for cases of overflow (inadequate registers for number size) replaced with overflow value
    """
    
    suffixoverlap_results = {}
    
    #initialize parameters
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    if 'suffix' in params:
      suffix = params['suffix']
    else:
      suffix = 'qbt1'
    
    if 'integer_bits' in params:
      integer_bits = params['integer_bits']
    else:
      integer_bits = 3
      
    if 'fractional_bits' in params:
      fractional_bits = params['fractional_bits']
    else:
      fractional_bits = 12
      
    #sign_bit accepted as True or False
    if 'sign_bit' in params:
      sign_bit = params['sign_bit']
    else:
      sign_bit = True
      
    qbt1_column = column + '_' + suffix
    
    if inplace is not True:
      
      #copy source column into new column
      df, suffixoverlap_results = \
      self._df_copy_train(df, column, qbt1_column, suffixoverlap_results)
    
    else:
      
      suffixoverlap_results = \
      self._df_check_suffixoverlap(df, qbt1_column, suffixoverlap_results)
      
      df.rename(columns = {column : qbt1_column}, inplace = True)
    
    #convert all values to either numeric or NaN
    df[qbt1_column] = pd.to_numeric(df[qbt1_column], errors='coerce')
    
    #grab a few drift stats
    minimum = df[qbt1_column].min()
    maximum = df[qbt1_column].max()
    mean = df[qbt1_column].mean()
    stdev = df[qbt1_column].std()
    
    #default infill is 0, kind of arbitrary, there's no perfect solution
    #recomend supplementing with NArw if a marker needed
    df[qbt1_column] = df[qbt1_column].fillna(0)
    
    #overflow is when entries have inadequate bits to represent, we'll set to 0 consistent with infill
    overflow = 0
    for i in range(integer_bits):
      overflow += 2**i
    for i in range(fractional_bits):
      overflow += 2**(-(i+1))
      
    #now replace overflow
    df[qbt1_column] = np.where(df[qbt1_column] > overflow, overflow, df[qbt1_column])
    
    if sign_bit is True:
      df[qbt1_column] = np.where(df[qbt1_column] < -overflow, -overflow, df[qbt1_column])
    else:
      df[qbt1_column] = np.where(df[qbt1_column] < 0, 0, df[qbt1_column])
      
    #list of sign columns
    if sign_bit is True:
      sign_columns = [qbt1_column + '_sign']
    else:
      sign_columns = []
      
    #list of integer columns
    integer_columns = []
    for integer in range(integer_bits-1, -1, -1):
      integer_columns.append(qbt1_column + '_2^' + str(integer))
      
    #list of fractional columns
    fractional_columns = []
    for fractional in range(fractional_bits):
      fractional_columns.append(qbt1_column + '_2^-' + str(fractional + 1))
      
    allcolumns = sign_columns + integer_columns + fractional_columns
    
    suffixoverlap_results = \
    self._df_check_suffixoverlap(df, allcolumns, suffixoverlap_results)
      
    #populate sign column, note that 0 is positive, 1 is negative
    if sign_bit is True:
      df[sign_columns[0]] = np.where(df[qbt1_column] < 0, 1, 0)
      
      #set data type
      df[sign_columns[0]] = df[sign_columns[0]].astype(np.int8)

      #now that sign bit is populated we'll take absolute of support column
      df[qbt1_column] = df[qbt1_column].abs()
      
    #populate integer columns
    for i, integer in enumerate(range(integer_bits-1, -1, -1)):
      
      #set value to integer column
      df[integer_columns[i]] = df[qbt1_column] / 2**integer
      
      #this rounds down to nearest integer which will be 0 or 1 unless case of overflow
      df[integer_columns[i]] = df[integer_columns[i]].astype(np.int8)

      df[qbt1_column] -= df[integer_columns[i]] * 2**integer
      
    #now populate the fractional columns
    df[qbt1_column] = df[qbt1_column] % 1

    for i, fractional in enumerate(range(fractional_bits)):

      fractional += 1
      fractional *= -1
      fractional = 2**fractional

#       #i is for indexing fractional_columns, j is the 2**(-j)
#       j = i+1

      df[fractional_columns[i]] = df[qbt1_column] / fractional

      df[fractional_columns[i]] = df[fractional_columns[i]].astype(np.int8)

      df[qbt1_column] -= df[fractional_columns[i]] * fractional
    
    #delete support column
    del df[qbt1_column]
    
    #allcolumns = sign_columns + integer_columns + fractional_columns
    
    column_dict_list = []
    
    for ac in allcolumns:

      normalization_dict = {ac : {'sign_columns' : sign_columns, \
                                  'integer_columns' : integer_columns, \
                                  'fractional_columns' : fractional_columns, \
                                  'suffix' : suffix, \
                                  'integer_bits' : integer_bits, \
                                  'fractional_bits' : fractional_bits, \
                                  'sign_bit' : sign_bit, \
                                  'minimum' : minimum, \
                                  'maximum' : maximum, \
                                  'mean' : mean, \
                                  'stdev' : stdev, \
                                  'overflow' : overflow}}
      
      column_dict = {ac : {'category' : 'qbt1', \
                           'origcategory' : category, \
                           'normalization_dict' : normalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : allcolumns, \
                           'categorylist' : allcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
      
    return df, column_dict_list
  
  def _process_null(self, df, column, category, postprocess_dict, params = {}):
    '''
    #this is placeholder for columns given a null deletion operation
    #(such as is default for training sets containing all nan values)
    #(deletion takes place in circle of life function)
    #the returned empty column_dict_list is consistent with convention for other transforms
    #for scenarios where no columns are returned
    '''
    
    suffixoverlap_results = {}
    
    # df = df.drop([column], axis=1)
    #deletion takes place in circle of life function

    column_dict_list = []

    # column_dict = {column : {'category' : 'null', \
    #                                   'origcategory' : category, \
    #                                   'normalization_dict' : {column + '_null':{}}, \
    #                                   'origcolumn' : column, \
    #                                   'inputcolumn' : column, \
    #                                   'columnslist' : [], \
    #                                   'categorylist' : [], \
    #                                   'infillmodel' : False, \
    #                                   'infillcomplete' : False, \
    #                                   'suffixoverlap_results' : suffixoverlap_results, \
    #                                   'deletecolumn' : False}}
    
    # #now append column_dict onto postprocess_dict
    # column_dict_list.append(column_dict.copy())

    return df, column_dict_list
  
  def _process_copy(self, df, column, category, postprocess_dict, params = {}):
    '''
    #copy function
    #accepts parameter 'suffix' for suffix appender
    #useful if want to apply same function more than once with different parameters
    '''
    
    suffixoverlap_results = {}
    
    if 'suffix' in params:
        
      copy_column = column + params['suffix']
    
    else:
      
      copy_column = column + '_copy'
    
    df, suffixoverlap_results = \
    self._df_copy_train(df, column, copy_column, suffixoverlap_results)

    column_dict_list = []

    column_dict = {copy_column : {'category' : 'copy', \
                                 'origcategory' : category, \
                                 'normalization_dict' : {copy_column:{}}, \
                                 'origcolumn' : column, \
                                 'inputcolumn' : column, \
                                 'columnslist' : [copy_column], \
                                 'categorylist' : [copy_column], \
                                 'infillmodel' : False, \
                                 'infillcomplete' : False, \
                                 'suffixoverlap_results' : suffixoverlap_results, \
                                 'deletecolumn' : False}}
    
    #now append column_dict onto postprocess_dict
    column_dict_list.append(column_dict.copy())

    return df, column_dict_list

  def _process_excl(self, df, column, category, postprocess_dict, params = {}):
    """
    #here we'll address any columns that returned a 'excl' category
    #note this is a. singleprocess transform
    #we'll simply maintain the same column but with a suffix to the header
    #the excl trasnform is a very special exception, and this suffix is later
    #removed when automunge(*.)parameter excl_suffix passed as False
    
    #note that excl transform is also special in that it may only be applied
    #as a supplement primitive in a family tree (eg cousins)
    #as it replaces the source column internally (by a simple rename)
    
    #Note that the function check_transformdict(.) works 'under the hood'
    #to translate user passed excl transforms in family trees
    #from replacement primitives to corresponding supplement primitives
    """
    
    suffixoverlap_results = {}

    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    exclcolumn = column + '_excl'

    if inplace is not True:

      df, suffixoverlap_results = \
      self._df_copy_train(df, column, exclcolumn, suffixoverlap_results)

    else:
    
      suffixoverlap_results = \
      self._df_check_suffixoverlap(df, exclcolumn, suffixoverlap_results)
      
      df.rename(columns = {column : exclcolumn}, inplace = True)

    #decided against this to maximize efficiency
    #there are some workflow scenarios where a lot of excl columns in postmunge
    # #this moves exclcolumn to end of dataframe to maintain column order correspondance
    # df_columns = list(df)
    # df_columns.remove(exclcolumn)
    # df_columns.append(exclcolumn)
    # df = df.reindex(columns = df_columns)
    
    column_dict_list = []

    column_dict = {exclcolumn : {'category' : 'excl', \
                                 'origcategory' : category, \
                                 'normalization_dict' : {exclcolumn:{}}, \
                                 'origcolumn' : column, \
                                 'inputcolumn' : column, \
                                 'columnslist' : [exclcolumn], \
                                 'categorylist' : [exclcolumn], \
                                 'infillmodel' : False, \
                                 'infillcomplete' : False, \
                                 'suffixoverlap_results' : suffixoverlap_results, \
                                 'deletecolumn' : False}}
    
    #now append column_dict onto postprocess_dict
    column_dict_list.append(column_dict.copy())

    return df, column_dict_list

  def _process_exc2(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    '''
    #here we'll address any columns that returned a 'excl' category
    #note this is a. singleprocess transform
    #we'll simply maintain the same column but with a suffix to the header
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    exclcolumn = column + '_exc2'
    
    if inplace is not True:

      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self._df_copy_train(mdf_train, column, exclcolumn, suffixoverlap_results)

      mdf_test[exclcolumn] = mdf_test[column].copy()

    else:

      suffixoverlap_results = \
      self._df_check_suffixoverlap(mdf_train, exclcolumn, suffixoverlap_results)

      mdf_train.rename(columns = {column : exclcolumn}, inplace = True)
      mdf_test.rename(columns = {column : exclcolumn}, inplace = True)
    
    #del df[column]
    
    mdf_train[exclcolumn] = pd.to_numeric(mdf_train[exclcolumn], errors='coerce')
    mdf_test[exclcolumn] = pd.to_numeric(mdf_test[exclcolumn], errors='coerce')
    
    if len(mdf_train[exclcolumn].mode())<1:
      fillvalue = mdf_train[exclcolumn].mean()
    else:
      fillvalue = mdf_train[exclcolumn].mode()[0]
      
    #special case if column didn't have any numeric entries
    if fillvalue != fillvalue:
      fillvalue = 0
    
    #replace missing data with fill value
    mdf_train[exclcolumn] = mdf_train[exclcolumn].fillna(fillvalue)
    mdf_test[exclcolumn] = mdf_test[exclcolumn].fillna(fillvalue)
    
    exc2_normalization_dict = {exclcolumn : {'fillvalue' : fillvalue}}
    
    column_dict_list = []

    column_dict = {exclcolumn : {'category' : 'exc2', \
                                 'origcategory' : category, \
                                 'normalization_dict' : exc2_normalization_dict, \
                                 'origcolumn' : column, \
                                 'inputcolumn' : column, \
                                 'columnslist' : [exclcolumn], \
                                 'categorylist' : [exclcolumn], \
                                 'infillmodel' : False, \
                                 'infillcomplete' : False, \
                                 'suffixoverlap_results' : suffixoverlap_results, \
                                 'deletecolumn' : False}}
    
    #now append column_dict onto postprocess_dict
    column_dict_list.append(column_dict.copy())

    return mdf_train, mdf_test, column_dict_list
  
  def _process_exc5(self, mdf_train, mdf_test, column, category, \
                         postprocess_dict, params = {}):
    '''
    #here we'll address any columns that returned a 'excl' category
    #note this is a. singleprocess transform
    #we'll simply maintain the same column but with a suffix to the header
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    exclcolumn = column + '_exc5'
    
    if inplace is not True:

      #copy source column into new column
      mdf_train, suffixoverlap_results = \
      self._df_copy_train(mdf_train, column, exclcolumn, suffixoverlap_results)

      mdf_test[exclcolumn] = mdf_test[column].copy()

    else:

      suffixoverlap_results = \
      self._df_check_suffixoverlap(mdf_train, exclcolumn, suffixoverlap_results)

      mdf_train.rename(columns = {column : exclcolumn}, inplace = True)
      mdf_test.rename(columns = {column : exclcolumn}, inplace = True)
    
    #del df[column]
    
    mdf_train[exclcolumn] = pd.to_numeric(mdf_train[exclcolumn], errors='coerce')
    mdf_test[exclcolumn] = pd.to_numeric(mdf_test[exclcolumn], errors='coerce')
    
    #non integers are subject to infill
    mdf_train[exclcolumn] = np.where(mdf_train[exclcolumn] == mdf_train[exclcolumn].round(), mdf_train[exclcolumn], np.nan)
    mdf_test[exclcolumn] = np.where(mdf_test[exclcolumn] == mdf_test[exclcolumn].round(), mdf_test[exclcolumn], np.nan)

    if len(mdf_train[exclcolumn].mode())<1:
      fillvalue = mdf_train[exclcolumn].mean()
    else:
      fillvalue = mdf_train[exclcolumn].mode()[0]
      
    #special case if column didn't have any numeric entries
    if fillvalue != fillvalue:
      fillvalue = 0
    
    #replace missing data with fill value
    mdf_train[exclcolumn] = mdf_train[exclcolumn].fillna(fillvalue)
    mdf_test[exclcolumn] = mdf_test[exclcolumn].fillna(fillvalue)
    
    exc2_normalization_dict = {exclcolumn : {'fillvalue' : fillvalue}}
    
    column_dict_list = []

    column_dict = {exclcolumn : {'category' : 'exc5', \
                                 'origcategory' : category, \
                                 'normalization_dict' : exc2_normalization_dict, \
                                 'origcolumn' : column, \
                                 'inputcolumn' : column, \
                                 'columnslist' : [exclcolumn], \
                                 'categorylist' : [exclcolumn], \
                                 'infillmodel' : False, \
                                 'infillcomplete' : False, \
                                 'suffixoverlap_results' : suffixoverlap_results, \
                                 'deletecolumn' : False}}
    
    #now append column_dict onto postprocess_dict
    column_dict_list.append(column_dict.copy())

    return mdf_train, mdf_test, column_dict_list
  
  def _process_shfl(self, df, column, category, postprocess_dict, params = {}):
    '''
    #function to shuffle data in a column
    #non-numeric entries allowed
    #for missing values, uses adjacent cell infill as default
    '''
    
    suffixoverlap_results = {}
    
    if 'inplace' in params:
      inplace = params['inplace']
    else:
      inplace = False
    
    if inplace is not True:
      
      #copy source column into new column
      df, suffixoverlap_results = \
      self._df_copy_train(df, column, column + '_shfl', suffixoverlap_results)
    
    else:
      
      suffixoverlap_results = \
      self._df_check_suffixoverlap(df, column + '_shfl', suffixoverlap_results)
      
      df.rename(columns = {column : column + '_shfl'}, inplace = True)
    
    #we've introduced that randomseed is now accessible throughout in the postprocess_dict
    random = postprocess_dict['randomseed']
    
    #uses support function
    df = self._df_shuffle_series(df, column + '_shfl', random)
    
    #we'll do the adjacent cell infill after the shuffle operation
    
    #apply ffill to replace NArows with value from adjacent cell in preceding row
    df[column + '_shfl'] = df[column + '_shfl'].fillna(method='ffill')
    
    #we'll follow with a bfill just in case first row had a nan
    df[column + '_shfl'] = df[column + '_shfl'].fillna(method='bfill')
    
    
    #create list of columns
    nmbrcolumns = [column + '_shfl']


    nmbrnormalization_dict = {column + '_shfl' : {}}

    #store some values in the nmbr_dict{} for use later in ML infill methods
    column_dict_list = []

    for nc in nmbrcolumns:

      column_dict = { nc : {'category' : 'shfl', \
                           'origcategory' : category, \
                           'normalization_dict' : nmbrnormalization_dict, \
                           'origcolumn' : column, \
                           'inputcolumn' : column, \
                           'columnslist' : nmbrcolumns, \
                           'categorylist' : nmbrcolumns, \
                           'infillmodel' : False, \
                           'infillcomplete' : False, \
                           'suffixoverlap_results' : suffixoverlap_results, \
                           'deletecolumn' : False}}

      column_dict_list.append(column_dict.copy())
        
    return df, column_dict_list

  def _evalcategory(self, df_source, column, randomseed, eval_ratio, \
                   numbercategoryheuristic, powertransform, labels = False):
    '''
    #evalcategory(df, column)
    #Function that dakes as input a dataframe and associated column id \
    #evaluates the contents of cells and classifies the column into one of four categories
    #category 1, 'bnry', is for columns with only two categorys of text or integer
    #category 2, 'nmbr', is for columns with ndumerical integer or float values
    #category 3: 'bxcx', is for nmbr category with all positive values
    #category 4, 'text', is for columns with multiple categories appropriate for one-hot
    #category 5, 'date', is for columns with Timestamp data
    #category 6, 'null', is for columns with all nan values in training set
    #returns category id as a string
    '''
    
    #we'll introduce convention of special values for powertransform to change default
    #we'll allow powertransform == 'excl' to signal that nonassigned columns should
    #be left untouched (a simpler version of existing functionality of assigning excl in assigncat)
    if powertransform == 'excl':
      category = 'excl'
      
    #or powertransform == 'exc2' for unprocessed but subject to force to numeric and modeinfill
    elif powertransform == 'exc2':
      category = 'exc2'
    
    #powertransform == 'infill' is for cases where data is already numerically encoded and just want infill
    elif powertransform == 'infill':
      rowcount = df_source.shape[0]

      #we'll have convention that eval_ratio only applied for sets with >2,000 rows
      if rowcount < 2000:
        eval_ratio = 1.
      else:
        if eval_ratio > 0 and eval_ratio <= 1:
          eval_ratio = eval_ratio
        else:
          eval_ratio = eval_ratio / rowcount
      
      #take a random sample of rows for evaluation based on eval_ratio heuristic
      df = pd.DataFrame(df_source[column]).sample(frac=eval_ratio, random_state=randomseed)

      floatcount = np.where(pd.to_numeric(df[column], errors='coerce') != pd.to_numeric(df[column], errors='coerce').round(), 1, 0).sum()
      floatset = np.where(pd.to_numeric(df[column], errors='coerce') != pd.to_numeric(df[column], errors='coerce').round(), 0, 1)
      stringset = np.where(df[column].astype(str) == df[column], 1, 0)
      floatstringcount = ((floatset == 1) & (stringset == 1)).sum()
      nancount = np.where(df[column] != df[column], 1, 0).sum()
      integercount = np.where(pd.to_numeric(df[column], errors='coerce') == pd.to_numeric(df[column], errors='coerce').round(), 1, 0).sum()
      integerset = np.where(pd.to_numeric(df[column], errors='coerce') == pd.to_numeric(df[column], errors='coerce').round(), 1, 0)
      # stringset = np.where(df[column].astype(str) == df[column], 1, 0)
      integerstringcount = ((integerset == 1) & (stringset == 1)).sum()
      actualfloatcount = floatcount - floatstringcount - nancount
      actualintegercount = integercount - integerstringcount
      uniquecount = df[column].nunique()
      rowcount = df[column].shape[0]
      actualfloatratio = actualfloatcount / rowcount
      actualintegerratio = actualintegercount / rowcount
      uniqueratio = uniquecount / rowcount

      #null if no numeric entries
      if actualfloatratio == 0 and actualintegerratio == 0:
        category = 'null'

      #exc2 for numeric types
      elif actualfloatratio > 0:
        category = 'exc2'

      #exc8 for integer type with unique ratio > 0.75
      elif (actualfloatratio == 0 and actualintegerratio > 0 and uniqueratio > 0.75):
        category = 'exc8'

      #exc5 for integers
      else:
        category = 'exc5'

    else:
      
      #_____
      #a few default categories
      
      #default categorical
      #defaultcategorical = 'text'
      defaultcategorical = '1010'
      
      #defaultordinal = 'ord3'
      #defaultordinal applied when unique values exceeds numbercategoryheuristic
      defaultordinal = 'hsh2'

      defaultordinal_allunique = 'hash'

      defaultbnry = 'bnry'
      
      defaultnumerical = 'nmbr'
      
      defaultdatetime = 'dat6'

      defaultnull = 'null'
      
      #_____

      rowcount = df_source.shape[0]

      #we'll have convention that eval_ratio only applied for sets with >2,000 rows
      if rowcount < 2000:
        eval_ratio = 1.
      else:
        if eval_ratio > 0 and eval_ratio <= 1:
          eval_ratio = eval_ratio
        else:
          eval_ratio = eval_ratio / rowcount
      
      #take a random sample of rows for evaluation based on eval_ratio heuristic
      df = pd.DataFrame(df_source[column]).sample(frac=eval_ratio, random_state=randomseed)

      #I couldn't find a good pandas tool for evaluating data class, \
      #So will produce an array containing data types of each cell and \
      #evaluate for most common variable using the collections library

      type1_df = df[column].transform(lambda x: type(x)).to_numpy()
      
      #c = collections.Counter(type1_df)
      c = Counter(type1_df)
      mc2 = c.most_common(2)
      mc = [mc2[0]]
      
      #count number of unique values
      nunique = df[column].nunique()

      #check if nan present for cases where nunique == 3
      nanpresent = False
      if nunique == 3:
        for unique in df[column].unique():
          if unique != unique:
            nanpresent = True

      #free memory (dtypes are memory hogs)
      del type1_df

      #additional array needed to check for time series

      type2_df = df[column].transform(lambda x: type(pd.to_datetime(x, errors = 'coerce', utc=True))).to_numpy()

      #datec = collections.Counter(type2_df)
      datec = Counter(type2_df)
      datemc = datec.most_common(1)
      datemc2 = datec.most_common(2)

      #this is to address scenario where only one value so we can still call mc2[1][0]
      if len(datemc2) == len(datemc):
        datemc2 = datemc + datemc

      #free memory (dtypes are memory hogs)
      del type2_df

      #an extension of this approach could be for those columns that produce a text\
      #category to implement an additional text to determine the number of \
      #common groupings / or the amount of uniquity. For example if every row has\
      #a unique value then one-hot-encoding would not be appropriate. It would \
      #probably be apopropraite to either return an error message if this is found \
      #or alternatively find a furhter way to automate this processing such as \
      #look for contextual clues to groupings that can be inferred.

      #This is kind of hack to evaluate class by comparing these with output of mc
      checkint = 1
      checkfloat = 1.1
      checkstring = 'string'
      checkNAN = np.nan

      #there's probably easier way to do this, here will create a check for date
      df_checkdate = pd.DataFrame([{'checkdate' : '7/4/2018'}])
      df_checkdate['checkdate'] = pd.to_datetime(df_checkdate['checkdate'], errors = 'coerce')

      #create dummy variable to store determined class (default is text class)
      category = defaultcategorical

      #if most common in column is string and > two values, set category to text
      if isinstance(checkstring, mc[0][0]) and nunique > 2:
        category = defaultcategorical

      #if most common is date, set category to date
      if isinstance(df_checkdate['checkdate'][0], datemc[0][0]):
        category = defaultdatetime

      if df[column].dtype.name == 'category':
        if nunique <= 2:
          category = defaultbnry
        else:
          category = defaultcategorical

      #if most common in column is integer and > two values, set category to number of bxcx
      if isinstance(checkint, mc[0][0]) and nunique > 2:

        if df[column].dtype.name == 'category':
          if nunique <= 2:
            category = defaultbnry
          else:
            category = defaultcategorical

        #take account for numbercategoryheuristic
        #if df[column].nunique() / df[column].shape[0] < numbercategoryheuristic:
        #if nunique < numbercategoryheuristic:
        if nunique <= 3:
          if nunique == 3:
            #category = 'text'
            category = defaultcategorical
          else:
            category = defaultbnry
  #       if True is False:
  #         pass

        else:
          category = defaultnumerical

      #if most common in column is float, set category to number or bxcx
      if isinstance(checkfloat, mc[0][0]):

        #take account for numbercategoryheuristic
        #if df[column].nunique() / df[column].shape[0] < numbercategoryheuristic \
  #       if nunique < numbercategoryheuristic \
  #       or df[column].dtype.name == 'category':
  #       if df[column].dtype.name == 'category':
        if df[column].dtype.name == 'category':
          if nunique <= 2:
            category = defaultbnry
          else:
            category = defaultcategorical

        elif nunique <= 3:
          if nunique == 3:
            category = defaultcategorical
          elif nunique <= 2:
            category = defaultbnry

        else:
          category = defaultnumerical

      #if most common in column is integer and <= two values, set category to binary
      if isinstance(checkint, mc[0][0]) and nunique <= 2:
        category = defaultbnry

      #if most common in column is string and <= two values, set category to binary
      if isinstance(checkstring, mc[0][0]) and nunique <= 2:
        category = defaultbnry

      #else if most common in column is NaN, re-evaluate using the second most common type
      #(I suspect the below might be impacted if there are three dtypes instead of two,
      #in which case the 50% ratio rule may not be valid, that is kind of remote edge case)
      #elif df[column].isna().sum() >= df.shape[0] / 2:
      if len(mc2) > 1:
      
        if df[column].isna().sum() >= df.shape[0] / 2:


          #if 2nd most common in column is string and two values, set category to binary
          if isinstance(checkstring, mc2[1][0]) and nunique == 2:
            category = defaultbnry

          #if 2nd most common in column is string and > two values, set category to text
          if isinstance(checkstring, mc2[1][0]) and nunique > 2:
            category = defaultcategorical

          #if 2nd most common is date, set category to date   
          if isinstance(df_checkdate['checkdate'][0], datemc2[1][0]):
            category = defaultdatetime

          #if 2nd most common in column is integer and > two values, set category to number
          if isinstance(checkint, mc2[1][0]) and nunique > 2:

            if df[column].dtype.name == 'category':
              if nunique <= 2:
                category = defaultbnry
              else:
                category = defaultcategorical

    #         #take account for numbercategoryheuristic
    #         #if df[column].nunique() / df[column].shape[0] < numbercategoryheuristic:
            if nunique <= 3:

              if nunique == 3:
                category = defaultcategorical
              else:
                category = defaultbnry

    #         if True is False:
    #           pass

            else:

              category = defaultnumerical

          #if 2nd most common in column is float, set category to number
          if isinstance(checkfloat, mc2[1][0]):

    #         #take account for numbercategoryheuristic
    #         #if df[column].nunique() / df[column].shape[0] < numbercategoryheuristic:
    #         if df[column].nunique() < numbercategoryheuristic:

    #           category = 'text'

    #         else:

            if df[column].dtype.name == 'category':
              if nunique <= 2:
                category = defaultbnry
              else:
                category = defaultcategorical

            if df[column].nunique() <= 3:

              if nunique == 3:
                #category = 'text'
                category = defaultcategorical
              else:
                category = defaultbnry

            else:

              category = defaultnumerical

          #if 2nd most common in column is integer and <= two values, set category to binary
          if isinstance(checkint, mc2[1][0]) and nunique <= 2:
            category = defaultbnry

          #if 2nd most common in column is string and <= two values, set category to binary
          if isinstance(checkstring, mc2[1][0]) and nunique <= 2:
            category = defaultbnry

      if df[column].isna().sum() == df.shape[0]:
        category = defaultnull

      #if category == 'text':
      if category == defaultcategorical:
        if nunique > numbercategoryheuristic:
          category = defaultordinal
        #here 0.75 is a heuristic, might be worth some fine tuning of this threshold
        if nunique > df.shape[0] * 0.75:
          category = defaultordinal_allunique

      #new statistical tests for numerical sets from v2.25
      #I don't consider mytself an expert here, these are kind of a placeholder while I conduct more research

  #     #default to 'nmbr' category instead of 'bxcx'
  #     if category == 'bxcx' and powertransform is False:
  #       category = 'nmbr'

      if category in {'nmbr', 'bxcx', defaultnumerical} \
      and powertransform is True:
        
        if df[pd.to_numeric(df[column], errors='coerce').notnull()][column].astype(float).nunique() >= 3:

          #shapiro tests for normality, we'll use a common threshold p<0.05 to reject the normality hypothesis
          stat, p = shapiro(df[pd.to_numeric(df[column], errors='coerce').notnull()][column].astype(float))
          #a typical threshold to test for normality is >0.05, let's try a lower bar for this application
          if p > 0.025:
            category = 'nmbr'
          if p <= 0.025:
            #skewness helps recognize exponential distributions, reference wikipedia
            #reference from wikipedia
    #       A normal distribution and any other symmetric distribution with finite third moment has a skewness of 0
    #       A half-normal distribution has a skewness just below 1
    #       An exponential distribution has a skewness of 2
    #       A lognormal distribution can have a skewness of any positive value, depending on its parameters
            #skewness = skew(df[column])
            skewness = skew(df[pd.to_numeric(df[column], errors='coerce').notnull()][column].astype(float))
            if skewness < 1.5:
              category = 'mnmx'
            else:
              #if powertransform is True:
              if category in {'nmbr', 'bxcx'}:

                #note we'll only allow bxcx category if all values greater than a clip value
                #>0 (currently set at 0.1) since there is an asymptote for box-cox at 0
                if (df[pd.to_numeric(df[column], errors='coerce').notnull()][column].astype(float) >= 0.1).all():
                  category = 'bxcx'

                else:
                  category = 'nmbr'

              else:
                category = 'MAD3'

      del df
      
      #special cases for evlauation of labels column
      if labels is True:

        #(defaultnumerical = 'nmbr')
        if category == defaultnumerical:
          category = 'lbnm'
          
        #(defaultcategorical = '1010')
        if category == defaultcategorical:
          #category = 'lb10'
          category = 'lbor'
          
        #(defaultordinal = 'ord3')
        if category == defaultordinal:
          category = 'lbor'

        #(defaultordinal_allunique = 'ord5')
        if category == defaultordinal_allunique:
          category = 'lbor'
          
        if category == 'text':
          category = 'lbor'
          
        if category == 'bnry':
          category = 'lbor'
          
        #(defaultdatetime = 'dat6')
        if category == defaultdatetime:
          category = 'lbda'
    
    return category

  def _getNArows(self, df2, column, category, postprocess_dict, \
                drift_dict = {}, driftassess = False):
    '''
    #NArows(df, column), function that when fed a dataframe, \
    #column id, and category label outputs a single column dataframe composed of \
    #True and False with the same number of rows as the input and the True's \
    #coresponding to those rows of the input that had missing or NaN data. This \
    #output can later be used to identify which rows for a column to infill with ML\
    # derived plug data
    
    #also accepts a dictionary to store results of a drfit assessment available
    #by passing driftassess = True
    #if drift assessment performed returns an updated dictionary withj results
    
    #by default all NArowtypes recognize np.inf as NaN
    #(option activated external to this function)
    '''
    
    NArowtype = postprocess_dict['process_dict'][category]['NArowtype']
    
    #originally these evaluations were performed on a copy of the received column
    #struck that approach to reduce memory overhead from copy operation
    #small tradeoff in that edits here (such as cast to numeric) 
    #are preserved outside of this function
    # df2 = pd.DataFrame(df[column].copy())
    
    #if category == 'text':
    if NArowtype in {'justNaN'}:
      
      if driftassess is True:
        
        nunique = df2[column].nunique()
        
        #this is to ensure postprocess_dict file size doesn't get out of control so 
        #only collect unique entries in source column drift stats
        #if number of unique is below a threshold (arbrily set to 500)
        if nunique < 500:

          drift_dict.update({column : {'unique' : df2[column].unique(), \
                                       'nunique' : nunique, \
                                       'nanratio' : pd.isna(df2[column]).sum() / df2[column].shape[0]}})
          
        else:
          
          drift_dict.update({column : {'nunique' : nunique, \
                                       'nanratio' : pd.isna(df2[column]).sum() / df2[column].shape[0]}})
          
      #returns dataframe of True and False, where True coresponds to the NaN's
      #renames column name to column + '_NArows'
      NArows = pd.isna(df2[column])
      NArows = pd.DataFrame(NArows)
      NArows = NArows.rename(columns = {column:column+'_NArows'})

#     if category == 'bnry':

#       #returns dataframe of True and False, where True coresponds to the NaN's
#       #renames column name to column + '_NArows'
#       NArows = pd.isna(df2[column])
#       NArows = pd.DataFrame(NArows)
#       NArows = NArows.rename(columns = {column:column+'_NArows'})

    if NArowtype in {'numeric'}:

      #convert all values to either numeric or NaN
      df2[column] = pd.to_numeric(df2[column], errors='coerce')
      
      if driftassess is True:
        
        if df2[column].notnull().nunique() > 2:
          W, p = shapiro(df2[df2[column].notnull()][column].astype(float))
          skew_stat = skew(df2[df2[column].notnull()][column].astype(float))
        else:
          W = np.nan
          p = np.nan
          skew_stat = np.nan
          
#         W, p = shapiro(df2[df2[column].notnull()][column].astype(float))
#         skew_stat = skew(df2[df2[column].notnull()][column].astype(float))
        
        drift_dict.update({column : {'max' : df2[column].max(), \
                                     'quantile_99' : df2[column].quantile(0.99), \
                                     'quantile_90' : df2[column].quantile(0.90), \
                                     'quantile_66' : df2[column].quantile(0.66), \
                                     'median' : df2[column].median(), \
                                     'quantile_33' : df2[column].quantile(0.33), \
                                     'quantile_10' : df2[column].quantile(0.10), \
                                     'quantile_01' : df2[column].quantile(0.01), \
                                     'min' : df2[column].min(), \
                                     'mean' : df2[column].mean(), \
                                     'std' : df2[column].std(), \
                                     'MAD' : df2[column].mad(), \
                                     'skew' : skew_stat, \
                                     'shapiro_W' : W, \
                                     'shapiro_p' : p, \
                                     'nan_ratio' : pd.isna(df2[column]).sum() / df2[column].shape[0]}})

      #returns dataframe of True and False, where True coresponds to the NaN's
      #renames column name to column + '_NArows'
      NArows = pd.isna(df2[column])
      NArows = pd.DataFrame(NArows)
      NArows = NArows.rename(columns = {column:column+'_NArows'})
      
    if NArowtype in {'integer'}:

      #convert all values to either numeric or NaN
      df2[column] = pd.to_numeric(df2[column], errors='coerce')
      
      #non integers are subject to infill
      df2[column] = np.where(df2[column] == df2[column].round(), df2[column], np.nan)
      
      if driftassess is True:
        
        if df2[column].notnull().nunique() > 2:
          W, p = shapiro(df2[df2[column].notnull()][column].astype(float))
          skew_stat = skew(df2[df2[column].notnull()][column].astype(float))
        else:
          W = np.nan
          p = np.nan
          skew_stat = np.nan
          
#         W, p = shapiro(df2[df2[column].notnull()][column].astype(float))
#         skew_stat = skew(df2[df2[column].notnull()][column].astype(float))
        
        drift_dict.update({column : {'max' : df2[column].max(), \
                                     'quantile_99' : df2[column].quantile(0.99), \
                                     'quantile_90' : df2[column].quantile(0.90), \
                                     'quantile_66' : df2[column].quantile(0.66), \
                                     'median' : df2[column].median(), \
                                     'quantile_33' : df2[column].quantile(0.33), \
                                     'quantile_10' : df2[column].quantile(0.10), \
                                     'quantile_01' : df2[column].quantile(0.01), \
                                     'min' : df2[column].min(), \
                                     'mean' : df2[column].mean(), \
                                     'std' : df2[column].std(), \
                                     'MAD' : df2[column].mad(), \
                                     'skew' : skew_stat, \
                                     'shapiro_W' : W, \
                                     'shapiro_p' : p, \
                                     'nan_ratio' : pd.isna(df2[column]).sum() / df2[column].shape[0]}})

      #returns dataframe of True and False, where True coresponds to the NaN's
      #renames column name to column + '_NArows'
      NArows = pd.isna(df2[column])
      NArows = pd.DataFrame(NArows)
      NArows = NArows.rename(columns = {column:column+'_NArows'})
      
    if NArowtype in {'positivenumeric'}:
      
      #this is so don't edit source column when reset values <= 0
      df2 = pd.DataFrame(df2[column]).copy()
      
      #convert all values to either numeric or NaN
      df2[column] = pd.to_numeric(df2[column], errors='coerce')
      nonpositive_ratio = df2[df2[column] <= 0].sum()[0] / df2[column].shape[0]
      
      df2.loc[df2[column] <= 0, (column)] = np.nan
      
      if driftassess is True:
        
        if df2[column].notnull().nunique() > 2:
          W, p = shapiro(df2[df2[column].notnull()][column].astype(float))
          skew_stat = skew(df2[df2[column].notnull()][column].astype(float))
        else:
          W = np.nan
          p = np.nan
          skew_stat = np.nan
        
        drift_dict.update({column : {'max' : df2[column].max(), \
                                     'quantile_99' : df2[column].quantile(0.99), \
                                     'quantile_90' : df2[column].quantile(0.90), \
                                     'quantile_66' : df2[column].quantile(0.66), \
                                     'median' : df2[column].median(), \
                                     'quantile_33' : df2[column].quantile(0.33), \
                                     'quantile_10' : df2[column].quantile(0.10), \
                                     'quantile_01' : df2[column].quantile(0.01), \
                                     'min' : df2[column].min(), \
                                     'mean' : df2[column].mean(), \
                                     'std' : df2[column].std(), \
                                     'MAD' : df2[column].mad(), \
                                     'skew' : skew_stat, \
                                     'shapiro_W' : W, \
                                     'shapiro_p' : p, \
                                     'nonpositive_ratio' : nonpositive_ratio, \
                                     'nan_ratio' : pd.isna(df2[column]).sum() / df2[column].shape[0]}})
    
      #returns dataframe of True and False, where True coresponds to the NaN's
      #renames column name to column + '_NArows'
      NArows = pd.isna(df2[column])
      NArows = pd.DataFrame(NArows)
      NArows = NArows.rename(columns = {column:column+'_NArows'})
      
    if NArowtype in {'nonnegativenumeric'}:
      
      #this is so don't edit source column when reset values < 0
      df2 = pd.DataFrame(df2[column]).copy()

      #convert all values to either numeric or NaN
      df2[column] = pd.to_numeric(df2[column], errors='coerce')
      negative_ratio = df2[df2[column] < 0].sum()[0] / df2[column].shape[0]
      
      df2.loc[df2[column] < 0, (column)] = np.nan
      
      if driftassess is True:
        
        if df2[column].notnull().nunique() > 2:
          W, p = shapiro(df2[df2[column].notnull()][column].astype(float))
          skew_stat = skew(df2[df2[column].notnull()][column].astype(float))
        else:
          W = np.nan
          p = np.nan
          skew_stat = np.nan
        
        drift_dict.update({column : {'max' : df2[column].max(), \
                                     'quantile_99' : df2[column].quantile(0.99), \
                                     'quantile_90' : df2[column].quantile(0.90), \
                                     'quantile_66' : df2[column].quantile(0.66), \
                                     'median' : df2[column].median(), \
                                     'quantile_33' : df2[column].quantile(0.33), \
                                     'quantile_10' : df2[column].quantile(0.10), \
                                     'quantile_01' : df2[column].quantile(0.01), \
                                     'min' : df2[column].min(), \
                                     'mean' : df2[column].mean(), \
                                     'std' : df2[column].std(), \
                                     'MAD' : df2[column].mad(), \
                                     'skew' : skew_stat, \
                                     'shapiro_W' : W, \
                                     'shapiro_p' : p, \
                                     'negative_ratio' : negative_ratio, \
                                     'nan_ratio' : pd.isna(df2[column]).sum() / df2[column].shape[0]}})

      #returns dataframe of True and False, where True coresponds to the NaN's
      #renames column name to column + '_NArows'
      NArows = pd.isna(df2[column])
      NArows = pd.DataFrame(NArows)
      NArows = NArows.rename(columns = {column:column+'_NArows'})
      
    if NArowtype in {'nonzeronumeric'}:

      #this is so don't edit source column when reset values == 0
      df2 = pd.DataFrame(df2[column]).copy()

      #convert all values to either numeric or NaN
      df2[column] = pd.to_numeric(df2[column], errors='coerce')
      zero_ratio = df2[df2[column] == 0].sum()[0] / df2[column].shape[0]
      
      df2.loc[df2[column] == 0, (column)] = np.nan
      
      if driftassess is True:
        
        if df2[column].notnull().nunique() > 2:
          W, p = shapiro(df2[df2[column].notnull()][column].astype(float))
          skew_stat = skew(df2[df2[column].notnull()][column].astype(float))
        else:
          W = np.nan
          p = np.nan
          skew_stat = np.nan
        
        drift_dict.update({column : {'max' : df2[column].max(), \
                                     'quantile_99' : df2[column].quantile(0.99), \
                                     'quantile_90' : df2[column].quantile(0.90), \
                                     'quantile_66' : df2[column].quantile(0.66), \
                                     'median' : df2[column].median(), \
                                     'quantile_33' : df2[column].quantile(0.33), \
                                     'quantile_10' : df2[column].quantile(0.10), \
                                     'quantile_01' : df2[column].quantile(0.01), \
                                     'min' : df2[column].min(), \
                                     'mean' : df2[column].mean(), \
                                     'std' : df2[column].std(), \
                                     'MAD' : df2[column].mad(), \
                                     'skew' : skew_stat, \
                                     'shapiro_W' : W, \
                                     'shapiro_p' : p, \
                                     'zero_ratio' : zero_ratio, \
                                     'nan_ratio' : pd.isna(df2[column]).sum() / df2[column].shape[0]}})

      #returns dataframe of True and False, where True coresponds to the NaN's
      #renames column name to column + '_NArows'
      NArows = pd.isna(df2[column])
      NArows = pd.DataFrame(NArows)
      NArows = NArows.rename(columns = {column:column+'_NArows'})
      
    if NArowtype in {'parsenumeric'}:
      
      NArows = self._parsenumeric(df2, column)

      drift_dict.update({column : {'nunique' : df2[column].nunique(), \
                                   'nanratio' : pd.isna(df2[column]).sum() / df2[column].shape[0]}})
      
    if NArowtype in {'datetime'}:
      
      df2[column] = pd.to_datetime(df2[column], errors = 'coerce', utc=True)

      if driftassess is True:
        drift_dict.update({column : {'nanratio' : pd.isna(df2[column]).sum() / df2[column].shape[0]}})
      
      NArows = pd.isna(df2[column])
      NArows = pd.DataFrame(NArows)
      NArows = NArows.rename(columns = {column:column+'_NArows'})
      
#       NArows = self._parsedate(df2, column)
      
    if NArowtype in {'exclude', 'boolexclude', 'ordlexclude', 'totalexclude'}:
      
      if driftassess is True:
        drift_dict.update({column : {}})
      
#       NArows = pd.DataFrame(np.zeros((df2.shape[0], 1)), columns=[column+'_NArows'])
      #NArows = NArows.rename(columns = {column:column+'_NArows'})
      
      NArows = pd.DataFrame(df2[column].copy())
      NArows[column] = False
      NArows = NArows.rename(columns = {column:column+'_NArows'})
#       NArows[column+'_NArows'] = False
      
    del df2
    
    if driftassess is False:
      
      return NArows
    
    else:
      
      return NArows, drift_dict
  
  def _parsedate(self, df, column):
    """
    #support function for NArows
    #parses datetime entries and returns a column with boolean identification
    #for entries that aren't registering as datetime objects
    #wherein activations are 0 if a datetime is present and 1 if not
    """
    
    df[column] = pd.to_datetime(df[column], errors = 'coerce')

    NArows = pd.isna(df[column])
    NArows = pd.DataFrame(NArows)
    NArows = NArows.rename(columns = {column:column+'_NArows'})
    
    return NArows
  
  def _is_number(self, s):
    """
    #support function for numeric parsing
    #tests if a string s is numeric
    #partly inspired by stack overflow discussion 
    #https://stackoverflow.com/questions/354038/how-do-i-check-if-a-string-is-a-number-float
    """
    try:
      s = float(s)
    except ValueError:
      return False
    if s == s and not np.isinf(s):
      return True
    else:
      #(nan will be subject to infill)
      return False

  def _is_number_comma(self, s):
    """
    #support function for numeric parsing
    #tests if a string s is numeric after stripping out commas
    #partly inspired by stack overflow discussion 
    #https://stackoverflow.com/questions/354038/how-do-i-check-if-a-string-is-a-number-float
    """
    try:
      #strips out commas
      s = float(s.replace(',',''))
    except ValueError:
      return False
    if s == s and not np.isinf(s):
      return True
    else:
      #(nan will be subject to infill)
      return False
    
  def _is_number_EU(self, s):
    """
    #support function for numeric parsing
    #tests if a string s is numeric after stripping out periods
    #and replacing commas with periods
    #such as to convert from international conventions to US
    #this is relavent for cases where working with international data on US OS
    #I expect if working on international OS a different convention may be appropriate
    #based on what is recognized as a float
    """
    try:
      #strips out spaces, periods other than first and last character, replaces commas with periods, cast as float
      s = float(s[0] + s[1:-1].replace(' ','').replace('.','').replace(',','.') + s[-1])
    except ValueError:
      return False
    if s == s and not np.isinf(s):
      return True
    else:
      #(nan will be subject to infill)
      return False
    
  def _parsenumeric(self, df, column):
    """
    #support function for process_nmrc and NArows
    #parses string entries and returns a column with boolean identification
    #for entries that include numeric string portions
    #wherein activations are 0 if a number is present and 1 if not
    #treats numeric entries as number as well
    """
    
    #first we find overlaps from mdf_train
    
    unique_list = list(df[column].astype(str).unique())

    unique_list = list(map(str, unique_list))
    
    maxlength = max(len(x) for x in unique_list)
    
    overlap_lengths = list(range(maxlength, 0, -1))

    overlap_dict = {}
    
    for overlap_length in overlap_lengths:

      for unique in unique_list:
        
        if unique not in overlap_dict:

          len_unique = len(unique)

          if len_unique >= overlap_length:
            
            if overlap_length > 1:

              nbr_iterations = len_unique - overlap_length

              for i in range(nbr_iterations + 1):
                
                if unique not in overlap_dict:

                  extract = unique[i:(overlap_length+i)]

  #                 extract_already_in_overlap_dict = False

                  if self._is_number(extract):
        
                    overlap_dict.update({unique : False})
                
            #else if overlap_length == 1    
            else:
              
              nbr_iterations = len_unique - overlap_length
              
              in_dict = False

              for i in range(nbr_iterations + 1):
                
                if unique not in overlap_dict:

                  extract = unique[i:(overlap_length+i)]

  #                 extract_already_in_overlap_dict = False

                  if self._is_number(extract):

                    in_dict = True

                    overlap_dict.update({unique : False})
                  
              if in_dict is False:

                overlap_dict.update({unique : True})
    
    NArows = pd.DataFrame(df[column].copy())

    NArows[column] = NArows[column].astype(str)
    NArows[column] = NArows[column].replace(overlap_dict)
#     df[column] = df[column].astype(np.int8)
    
    NArows.columns = [column+'_NArows']
    
    return NArows

  def _populateMLinfilldefaults(self, randomseed):
    '''
    populates a dictionary with default values for ML infill,
    currently based on Random Forest Regressor and Random Forest Classifier 
    (Each based on ScikitLearn default values)
  
    note that n_estimators set at 100 (default for version 0.22)
    '''
  
    MLinfilldefaults = {'RandomForestClassifier':{}, 'RandomForestRegressor':{}}
    
    MLinfilldefaults['RandomForestClassifier'].update({'n_estimators':100, \
                                                       'criterion':'gini', \
                                                       'max_depth':None, \
                                                       'min_samples_split':2, \
                                                       'min_samples_leaf':1, \
                                                       'min_weight_fraction_leaf':0.0, \
                                                       'max_features':'auto', \
                                                       'max_leaf_nodes':None, \
                                                       'min_impurity_decrease':0.0, \
                                                       'min_impurity_split':None, \
                                                       'bootstrap':True, \
                                                       'oob_score':False, \
                                                       'n_jobs':None, \
                                                       'random_state':randomseed, \
                                                       'verbose':0, \
                                                       'warm_start':False, \
                                                       'class_weight':None})
  
    MLinfilldefaults['RandomForestRegressor'].update({'n_estimators':100, \
                                                      'criterion':'mse', \
                                                      'max_depth':None, \
                                                      'min_samples_split':2, \
                                                      'min_samples_leaf':1, \
                                                      'min_weight_fraction_leaf':0.0, \
                                                      'max_features':'auto', \
                                                      'max_leaf_nodes':None, \
                                                      'min_impurity_decrease':0.0, \
                                                      'min_impurity_split':None, \
                                                      'bootstrap':True, \
                                                      'oob_score':False, \
                                                      'n_jobs':None, \
                                                      'random_state':randomseed, \
                                                      'verbose':0, \
                                                      'warm_start':False})

    return MLinfilldefaults

  def _initRandomForestClassifier(self, ML_cmnd, MLinfilldefaults):
    '''
    function that assigns appropriate parameters based on defaults and user inputs
    and then initializes a RandomForestClassifier model
    '''
    
    #populate ML_cmnd if stuff not already present
    if 'MLinfill_cmnd' not in ML_cmnd:
      ML_cmnd.update({'MLinfill_cmnd':{}})
    if 'RandomForestClassifier' not in ML_cmnd['MLinfill_cmnd']:
      ML_cmnd['MLinfill_cmnd'].update({'RandomForestClassifier':{}})
      

    #MLinfilldefaults['RandomForestClassifier']
    if 'n_estimators' in ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']:
      n_estimators = ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']['n_estimators']
    else:
      n_estimators = MLinfilldefaults['RandomForestClassifier']['n_estimators']

    if 'criterion' in ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']:
      criterion = ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']['criterion']
    else:
      criterion = MLinfilldefaults['RandomForestClassifier']['criterion']

    if 'max_depth' in ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']:
      max_depth = ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']['max_depth']
    else:
      max_depth = MLinfilldefaults['RandomForestClassifier']['max_depth']

    if 'min_samples_split' in ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']:
      min_samples_split = ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']['min_samples_split']
    else:
      min_samples_split = MLinfilldefaults['RandomForestClassifier']['min_samples_split']

    if 'min_samples_leaf' in ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']:
      min_samples_leaf = ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']['min_samples_leaf']
    else:
      min_samples_leaf = MLinfilldefaults['RandomForestClassifier']['min_samples_leaf']

    if 'min_weight_fraction_leaf' in ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']:
      min_weight_fraction_leaf = ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']['min_weight_fraction_leaf']
    else:
      min_weight_fraction_leaf = MLinfilldefaults['RandomForestClassifier']['min_weight_fraction_leaf']

    if 'max_features' in ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']:
      max_features = ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']['max_features']
    else:
      max_features = MLinfilldefaults['RandomForestClassifier']['max_features']

    if 'max_leaf_nodes' in ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']:
      max_leaf_nodes = ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']['max_leaf_nodes']
    else:
      max_leaf_nodes = MLinfilldefaults['RandomForestClassifier']['max_leaf_nodes']

    if 'min_impurity_decrease' in ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']:
      min_impurity_decrease = ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']['min_impurity_decrease']
    else:
      min_impurity_decrease = MLinfilldefaults['RandomForestClassifier']['min_impurity_decrease']

    if 'min_impurity_split' in ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']:
      min_impurity_split = ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']['min_impurity_split']
    else:
      min_impurity_split = MLinfilldefaults['RandomForestClassifier']['min_impurity_split']

    if 'bootstrap' in ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']:
      bootstrap = ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']['bootstrap']
    else:
      bootstrap = MLinfilldefaults['RandomForestClassifier']['bootstrap']

    if 'oob_score' in ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']:
      oob_score = ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']['oob_score']
    else:
      oob_score = MLinfilldefaults['RandomForestClassifier']['oob_score']

    if 'n_jobs' in ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']:
      n_jobs = ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']['n_jobs']
    else:
      n_jobs = MLinfilldefaults['RandomForestClassifier']['n_jobs']

    if 'random_state' in ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']:
      random_state = ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']['random_state']
    else:
      random_state = MLinfilldefaults['RandomForestClassifier']['random_state']

    if 'verbose' in ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']:
      verbose = ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']['verbose']
    else:
      verbose = MLinfilldefaults['RandomForestClassifier']['verbose']

    if 'warm_start' in ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']:
      warm_start = ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']['warm_start']
    else:
      warm_start = MLinfilldefaults['RandomForestClassifier']['warm_start']

    if 'class_weight' in ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']:
      class_weight = ML_cmnd['MLinfill_cmnd']['RandomForestClassifier']['class_weight']
    else:
      class_weight = MLinfilldefaults['RandomForestClassifier']['class_weight']

    #do other stuff?

    #then initialize RandomForestClassifier model
    model = RandomForestClassifier(n_estimators = n_estimators, \
                                   criterion = criterion, \
                                   max_depth = max_depth, \
                                   min_samples_split = min_samples_split, \
                                   min_samples_leaf = min_samples_leaf, \
                                   min_weight_fraction_leaf = min_weight_fraction_leaf, \
                                   max_features = max_features, \
                                   max_leaf_nodes = max_leaf_nodes, \
                                   min_impurity_decrease = min_impurity_decrease, \
                                   min_impurity_split = min_impurity_split, \
                                   bootstrap = bootstrap, \
                                   oob_score = oob_score, \
                                   n_jobs = n_jobs, \
                                   random_state = random_state, \
                                   verbose = verbose, \
                                   warm_start = warm_start, \
                                   class_weight = class_weight)

    return model

  def _initRandomForestRegressor(self, ML_cmnd, MLinfilldefaults):
    '''
    function that assigns appropriate parameters based on defaults and user inputs
    and then initializes a RandomForestRegressor model
    '''
    
    #populate ML_cmnd if stuff not already present
    if 'MLinfill_cmnd' not in ML_cmnd:
      ML_cmnd.update({'MLinfill_cmnd':{}})
    if 'RandomForestRegressor' not in ML_cmnd['MLinfill_cmnd']:
      ML_cmnd['MLinfill_cmnd'].update({'RandomForestRegressor':{}})
      
    #MLinfilldefaults['RandomForestRegressor']
    if 'n_estimators' in ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']:
      n_estimators = ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']['n_estimators']
    else:
      n_estimators = MLinfilldefaults['RandomForestRegressor']['n_estimators']

    if 'criterion' in ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']:
      criterion = ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']['criterion']
    else:
      criterion = MLinfilldefaults['RandomForestRegressor']['criterion']

    if 'max_depth' in ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']:
      max_depth = ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']['max_depth']
    else:
      max_depth = MLinfilldefaults['RandomForestRegressor']['max_depth']

    if 'min_samples_split' in ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']:
      min_samples_split = ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']['min_samples_split']
    else:
      min_samples_split = MLinfilldefaults['RandomForestRegressor']['min_samples_split']

    if 'min_samples_leaf' in ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']:
      min_samples_leaf = ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']['min_samples_leaf']
    else:
      min_samples_leaf = MLinfilldefaults['RandomForestRegressor']['min_samples_leaf']

    if 'min_weight_fraction_leaf' in ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']:
      min_weight_fraction_leaf = ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']['min_weight_fraction_leaf']
    else:
      min_weight_fraction_leaf = MLinfilldefaults['RandomForestRegressor']['min_weight_fraction_leaf']

    if 'max_features' in ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']:
      max_features = ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']['max_features']
    else:
      max_features = MLinfilldefaults['RandomForestRegressor']['max_features']

    if 'max_leaf_nodes' in ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']:
      max_leaf_nodes = ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']['max_leaf_nodes']
    else:
      max_leaf_nodes = MLinfilldefaults['RandomForestRegressor']['max_leaf_nodes']

    if 'min_impurity_decrease' in ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']:
      min_impurity_decrease = ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']['min_impurity_decrease']
    else:
      min_impurity_decrease = MLinfilldefaults['RandomForestRegressor']['min_impurity_decrease']

    if 'min_impurity_split' in ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']:
      min_impurity_split = ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']['min_impurity_split']
    else:
      min_impurity_split = MLinfilldefaults['RandomForestRegressor']['min_impurity_split']

    if 'bootstrap' in ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']:
      bootstrap = ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']['bootstrap']
    else:
      bootstrap = MLinfilldefaults['RandomForestRegressor']['bootstrap']

    if 'oob_score' in ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']:
      oob_score = ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']['oob_score']
    else:
      oob_score = MLinfilldefaults['RandomForestRegressor']['oob_score']

    if 'n_jobs' in ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']:
      n_jobs = ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']['n_jobs']
    else:
      n_jobs = MLinfilldefaults['RandomForestClassifier']['n_jobs']

    if 'random_state' in ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']:
      random_state = ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']['random_state']
    else:
      random_state = MLinfilldefaults['RandomForestRegressor']['random_state']

    if 'verbose' in ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']:
      verbose = ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']['verbose']
    else:
      verbose = MLinfilldefaults['RandomForestRegressor']['verbose']

    if 'warm_start' in ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']:
      warm_start = ML_cmnd['MLinfill_cmnd']['RandomForestRegressor']['warm_start']
    else:
      warm_start = MLinfilldefaults['RandomForestRegressor']['warm_start']

    #do other stuff?

    #then initialize RandomForestRegressor model 
    model = RandomForestRegressor(n_estimators = n_estimators, \
                                  criterion = criterion, \
                                  max_depth = max_depth, \
                                  min_samples_split = min_samples_split, \
                                  min_samples_leaf = min_samples_leaf, \
                                  min_weight_fraction_leaf = min_weight_fraction_leaf, \
                                  max_features = max_features, \
                                  max_leaf_nodes = max_leaf_nodes, \
                                  min_impurity_decrease = min_impurity_decrease, \
                                  min_impurity_split = min_impurity_split, \
                                  bootstrap = bootstrap, \
                                  oob_score = oob_score, \
                                  n_jobs = n_jobs, \
                                  random_state = random_state, \
                                  verbose = verbose, \
                                  warm_start = warm_start)

    return model
  
  def _inspect_ML_cmnd(self, ML_cmnd, autoML_type, MLinfill_alg):
    """
    #Inspects ML_cmnd to determine if any of the parameters passed
    #for regressor or classifier are passed as lists instead of distinct
    #values, in which case they will be evaluated via grid search
    #or in a future extension random search or other hyperparameter tuning methods
    
    #takes as input user-passed ML_cmnd, returns tune_marker
    #where tune_marker = True indicates sets were passed, else False
    
    #MLinfill_type refers to type of predcitive algorithm applied,
    #currently only support for scikit Random Forest via 'default'
    #intent is to build in additional options
    
    #MLinfill_alg refers to the target algorithm for passed parameters
    #currently supports 'RandomForestRegressor' & 'RandomForestClassifier'
    """
    
    #initialize tune_marker to default
    tune_marker = False
    
    if autoML_type == 'randomforest':
    
      if 'MLinfill_cmnd' in ML_cmnd:

        if MLinfill_alg in ML_cmnd['MLinfill_cmnd']:

          for key in ML_cmnd['MLinfill_cmnd'][MLinfill_alg]:

            #if passed parameter is a set
            if type(ML_cmnd['MLinfill_cmnd'][MLinfill_alg][key]) \
            in {type([1]), type(range(1)), type(stats.expon(1))}:

              tune_marker = True
    
    return tune_marker
  
  def _assemble_param_sets(self, ML_cmnd, autoML_type, MLinfill_alg):
    """
    #assembles ML_cmnd passed parameters into two sets
    #for hyoeroparameter tuning operation
    
    #those parameters that were passed as sets 
    #will be saved in tune_params dictionary
    
    #those parameters that were otherwise passed
    #will be saved in static_params dictionary
    
    #returns those two dictionaries tune_params & static_params
    
    #MLinfill_type refers to type of predcitive algorithm applied,
    #currently only support for scikit Random Forest via 'default'
    #intent is to build in additional options
    
    #MLinfill_alg refers to the target algorithm for passed parameters
    #currently supports 'RandomForestRegressor' & 'RandomForestClassifier'
    """
    
    #initialize returned dictionaries
    static_params = {}
    tune_params = {}
    
    if autoML_type == 'randomforest':
      
      if 'MLinfill_cmnd' in ML_cmnd:
        
        if MLinfill_alg in ML_cmnd['MLinfill_cmnd']:
          
          for key in ML_cmnd['MLinfill_cmnd'][MLinfill_alg]:
            
            #if passed parameter is a set
            if type(ML_cmnd['MLinfill_cmnd'][MLinfill_alg][key]) \
            in {type([1]), type(range(1)), type(stats.expon(1))}:
              
              #add set to tune_params which will be targeted for grid search
              tune_params.update({key : ML_cmnd['MLinfill_cmnd'][MLinfill_alg][key]})
              
            else:
              
              #else add to static_params which will overwrite defaults
              static_params.update({key : ML_cmnd['MLinfill_cmnd'][MLinfill_alg][key]})
        
    return static_params, tune_params

  def _predictinfill(self, category, df_train_filltrain, df_train_filllabel, \
                    df_train_fillfeatures, df_test_fillfeatures, randomseed, \
                    postprocess_dict, ML_cmnd, autoMLer, printstatus, categorylist = []):
    '''
    #predictinfill(category, df_train_filltrain, df_train_filllabel, \
    #df_train_fillfeatures, df_test_fillfeatures, randomseed, categorylist), \
    #function that takes as input \
    #a category string, the output of createMLinfillsets(.), a seed for randomness \
    #and a list of columns produced by a text class preprocessor when applicable and 
    #returns predicted infills for the train and test feature sets as df_traininfill, \
    #df_testinfill based on derivations using scikit-learn, with the lenth of \
    #infill consistent with the number of True values from NArows, and the trained \
    #model
    #accepts autoMLer populated with architecture options which is applied based on entries to ML_cmnd
    '''

    #if autoML_type not specified than we'll apply default (randomforest)
    #note this is only a temporary update to ML_cmnd and is not returned from function call
    if 'autoML_type' not in ML_cmnd:
      ML_cmnd.update({'autoML_type' : 'randomforest'})
    
    #grab autoML_type from ML_cmnd, this will be one of our keys for autoMLer dictionary
    autoML_type = ML_cmnd['autoML_type']
  
    #MLinfilltype distinguishes between classifier/regressor, single/multi column, ordinal/onehot/binary, etc
    #see potential values documented in assembleprocessdict function
    MLinfilltype = postprocess_dict['process_dict'][category]['MLinfilltype']
    
    #if a numeric target set
    if MLinfilltype in {'numeric', 'concurrent_nmbr', 'integer'}:
      
      #edge case if training data has zero rows (such as if column was all NaN) 
      if df_train_filltrain.shape[0] == 0:
        df_traininfill = np.zeros(shape=(1,len(categorylist)))
        df_testinfill = np.zeros(shape=(1,len(categorylist)))

        model = False
      
      else:
        
        #now call our training function
        #which handles tuning if applicable, model initialization, and training
        
        #ML_application is another key to access the function, distinguishes between classification and regression
        ML_application = 'regression'
        
        model = \
        autoMLer[autoML_type][ML_application]['train'](ML_cmnd, df_train_filltrain, df_train_filllabel, randomseed, printstatus, postprocess_dict)
        
        #only run following if we have any train rows needing infill
        if df_train_fillfeatures.shape[0] > 0:
          df_traininfill = autoMLer[autoML_type][ML_application]['predict'](ML_cmnd, model, df_train_fillfeatures, printstatus, categorylist)
        else:
          df_traininfill = np.array([0])

        #only run following if we have any test rows needing infill
        if df_test_fillfeatures.shape[0] > 0:
          df_testinfill = autoMLer[autoML_type][ML_application]['predict'](ML_cmnd, model, df_test_fillfeatures, printstatus, categorylist)
        else:
          df_testinfill = np.array([0])

      #convert infill values to dataframe
      df_traininfill = pd.DataFrame(df_traininfill, columns = ['infill'])
      df_testinfill = pd.DataFrame(df_testinfill, columns = ['infill'])

      # #this might be useful for tlbn, leaving out or now since don't want to clutter mlinfilltypes
      # #as concurrent_nmbr is intended as a resource for more than just tlbn
      # if MLinfilltype == 'concurrent_nmbr':
      #   df_traininfill['infill'] = np.where(df_traininfill['infill'] < 0, -1, df_traininfill['infill'])
      #   df_testinfill['infill'] = np.where(df_testinfill['infill'] < 0, -1, df_testinfill['infill'])

      if MLinfilltype == 'integer':

        df_traininfill = df_traininfill.round()
        df_testinfill = df_testinfill.round()
      
    #if target is categoric, such as ordinal or boolean integers
    if MLinfilltype in {'singlct', 'binary', 'concurrent_act'}:
      
      #edge case if training data has zero rows (such as if column was all NaN) 
      if df_train_filltrain.shape[0] == 0:
        df_traininfill = np.zeros(shape=(1,len(categorylist)))
        df_testinfill = np.zeros(shape=(1,len(categorylist)))

        model = False

      else:
        
        #now call our training function
        #which handles tuning if applicable, model initialization, and training
        
        #ML_application is another key to access the function, distinguishes between classification and regression
        if MLinfilltype == 'singlct':
          ML_application = 'ordinalclassification'
        else:
          ML_application = 'booleanclassification'
        
        model = \
        autoMLer[autoML_type][ML_application]['train'](ML_cmnd, df_train_filltrain, df_train_filllabel, randomseed, printstatus, postprocess_dict)
        
        #only run following if we have any train rows needing infill
        if df_train_fillfeatures.shape[0] > 0:
          df_traininfill = autoMLer[autoML_type][ML_application]['predict'](ML_cmnd, model, df_train_fillfeatures, printstatus, categorylist)
        else:
          df_traininfill = np.array([0])

        #only run following if we have any test rows needing infill
        if df_test_fillfeatures.shape[0] > 0:
          df_testinfill = autoMLer[autoML_type][ML_application]['predict'](ML_cmnd, model, df_test_fillfeatures, printstatus, categorylist)
        else:
          df_testinfill = np.array([0])

      #convert infill values to dataframe
      df_traininfill = pd.DataFrame(df_traininfill, columns = ['infill'])
      df_testinfill = pd.DataFrame(df_testinfill, columns = ['infill'])
      
    #if target is multi column categoric (onehot encoded) / (binary encoded handled seperately)
    if MLinfilltype in {'multirt'}:

      if df_train_filltrain.shape[0] == 0:
        df_traininfill = np.zeros(shape=(1,len(categorylist)))
        df_testinfill = np.zeros(shape=(1,len(categorylist)))

        model = False

      else:
        
        #future extension - Label Smoothing for ML infill
        #(might incorporate this into the training function to be activated by ML_cmnd)
          
        #now call our training function
        #which handles tuning if applicable, model initialization, and training
        
        #ML_application is another key to access the function, distinguishes between classification and regression
        ML_application = 'onehotclassification'
        
        model = \
        autoMLer[autoML_type][ML_application]['train'](ML_cmnd, df_train_filltrain, df_train_filllabel, randomseed, printstatus, postprocess_dict)
        
        #only run following if we have any train rows needing infill
        if df_train_fillfeatures.shape[0] > 0:
          df_traininfill = autoMLer[autoML_type][ML_application]['predict'](ML_cmnd, model, df_train_fillfeatures, printstatus, categorylist)
        else:
          #this needs to have same number of columns as text category
          df_traininfill = np.zeros(shape=(1,len(categorylist)))
        
        #only run following if we have any test rows needing infill
        if df_test_fillfeatures.shape[0] > 0:
          df_testinfill = autoMLer[autoML_type][ML_application]['predict'](ML_cmnd, model, df_test_fillfeatures, printstatus, categorylist)
        else:
          #this needs to have same number of columns as text category
          df_testinfill = np.zeros(shape=(1,len(categorylist)))
          
      #convert infill values to dataframe (this column labeleling also works for single column case)
      df_traininfill = pd.DataFrame(df_traininfill, columns = categorylist)
      df_testinfill = pd.DataFrame(df_testinfill, columns = categorylist)
    
    #if target is a binary encoded categoric set
    if MLinfilltype in {'1010'}:
      
      if df_train_filltrain.shape[0] == 0:

        df_traininfill = np.zeros(shape=(1,len(categorylist)))
        df_testinfill = np.zeros(shape=(1,len(categorylist)))

        model = False

      else:

        #convert from binary to one-hot encoding
        df_train_filllabel = \
        self._convert_1010_to_onehot(df_train_filllabel)
          
        #now call our training function
        #which handles tuning if applicable, model initialization, and training
        
        #ML_application is another key to access the function, distinguishes between classification and regression
        ML_application = 'onehotclassification'
        
        model = \
        autoMLer[autoML_type][ML_application]['train'](ML_cmnd, df_train_filltrain, df_train_filllabel, randomseed, printstatus, postprocess_dict)

        #this is to support 1010 infill predictions in postmunge
        for entry in categorylist:
          postprocess_dict['column_dict'][entry].update({'_1010_categorylist_proxy_for_postmunge_MLinfill' : list(range(df_train_filllabel.shape[1]))})
        
        #only run following if we have any train rows needing infill
        if df_train_fillfeatures.shape[0] > 0:
          df_traininfill = autoMLer[autoML_type][ML_application]['predict'](ML_cmnd, model, df_train_fillfeatures, printstatus, list(range(df_train_filllabel.shape[1])))

          df_traininfill = \
          self._convert_onehot_to_1010(df_traininfill)

        else:
          #this needs to have same number of columns as text category
          df_traininfill = np.zeros(shape=(1,len(categorylist)))
        
        #only run following if we have any test rows needing infill
        if df_test_fillfeatures.shape[0] > 0:
          df_testinfill = autoMLer[autoML_type][ML_application]['predict'](ML_cmnd, model, df_test_fillfeatures, printstatus, list(range(df_train_filllabel.shape[1])))

          df_testinfill = \
          self._convert_onehot_to_1010(df_testinfill)

        else:
          #this needs to have same number of columns as text category
          df_testinfill = np.zeros(shape=(1,len(categorylist)))

      #convert infill values to dataframe
      df_traininfill = pd.DataFrame(df_traininfill, columns = categorylist)
      df_testinfill = pd.DataFrame(df_testinfill, columns = categorylist)
      
    #if target category excluded from ML infill:
    if MLinfilltype in {'exclude', 'boolexclude', 'ordlexclude', 'totalexclude'}:

      #create empty sets for now
      #an extension of this method would be to implement a comparable infill \
      #method for the time category, based on the columns output from the \
      #preprocessing
      df_traininfill = pd.DataFrame({'infill' : [0]}) 
      df_testinfill = pd.DataFrame({'infill' : [0]}) 

      model = False
    
    return df_traininfill, df_testinfill, model, postprocess_dict

  def _createMLinfillsets(self, df_train, df_test, column, trainNArows, testNArows, \
                         category, randomseed, postprocess_dict, columnslist = [], \
                         categorylist = []):
    '''
    #update createMLinfillsets as follows:
    #instead of diferientiation by category, do a test for whether categorylist = []
    #if so do a single column transform excluding those other columns from columnslist
    #in the sets comparable to , otherwise do a transform comparable to text category
    #createMLinfillsets(df_train, df_test, column, trainNArows, testNArows, \
    #category, columnslist = []) function that when fed dataframes of train and\
    #test sets, column id, df of True/False corresponding to rows from original \
    #sets with missing values, a string category of 'text', 'date', 'nmbr', or \
    #'bnry', and a list of column id's for the text category if applicable. The \
    #function returns a seris of dataframes which can be applied to training a \
    #machine learning model to predict apppropriate infill values for those points \
    #that had missing values from the original sets, indlucing returns of \
    #df_train_filltrain, df_train_filllabel, df_train_fillfeatures, \
    #and df_test_fillfeatures
    '''
    
    MLinfilltype = postprocess_dict['process_dict'][category]['MLinfilltype']
    
    #create 3 new dataframes for each train column - the train and labels \
    #for rows not needing infill, and the features for rows needing infill \
    #also create a test features column 
    
    if MLinfilltype in {'numeric', 'singlct', 'integer', 'binary', \
                        'multirt', '1010', \
                        'concurrent_act', 'concurrent_nmbr'}:

      #if this is a single column set or concurrent_act
      if len(categorylist) == 1 or \
      postprocess_dict['process_dict'][category]['MLinfilltype'] in {'concurrent_act', 'concurrent_nmbr'}:

        #first concatinate the NArows True/False designations to df_train & df_test
        df_train = pd.concat([df_train, trainNArows], axis=1)
        df_test = pd.concat([df_test, testNArows], axis=1)

        #create copy of df_train to serve as training set for fill
        df_train_filltrain = df_train.copy()
        #now delete rows coresponding to True
        df_train_filltrain = df_train_filltrain[df_train_filltrain[trainNArows.columns[0]] == False]

        #now delete columns = columnslist and the NA labels (orig column+'_NArows') from this df
        df_train_filltrain = df_train_filltrain.drop(columnslist, axis=1)
        df_train_filltrain = df_train_filltrain.drop([trainNArows.columns[0]], axis=1)

        #create a copy of df_train[column] for fill train labels
        df_train_filllabel = pd.DataFrame(df_train[column].copy())
        #concatinate with the NArows
        df_train_filllabel = pd.concat([df_train_filllabel, trainNArows], axis=1)
        #drop rows corresponding to True
        df_train_filllabel = df_train_filllabel[df_train_filllabel[trainNArows.columns[0]] == False]

        #delete the NArows column
        df_train_filllabel = df_train_filllabel.drop([trainNArows.columns[0]], axis=1)

        #create features df_train for rows needing infill
        #create copy of df_train (note it already has NArows included)
        df_train_fillfeatures = df_train.copy()
        #delete rows coresponding to False
        df_train_fillfeatures = df_train_fillfeatures[(df_train_fillfeatures[trainNArows.columns[0]])]
        #delete columnslist and column+'_NArows'
        df_train_fillfeatures = df_train_fillfeatures.drop(columnslist, axis=1)
        df_train_fillfeatures = df_train_fillfeatures.drop([trainNArows.columns[0]], axis=1)

        #create features df_test for rows needing infill
        #create copy of df_test (note it already has NArows included)
        df_test_fillfeatures = df_test.copy()
        #delete rows coresponding to False
        df_test_fillfeatures = df_test_fillfeatures[(df_test_fillfeatures[testNArows.columns[0]])]
        #delete column and column+'_NArows'
        df_test_fillfeatures = df_test_fillfeatures.drop(columnslist, axis=1)
        df_test_fillfeatures = df_test_fillfeatures.drop([testNArows.columns[0]], axis=1)

        #delete NArows from df_train, df_test
        df_train = df_train.drop([trainNArows.columns[0]], axis=1)
        df_test = df_test.drop([testNArows.columns[0]], axis=1)

      #else if categorylist wasn't single entry
      else:

        #create a list of columns representing columnslist exlucding elements from
        #categorylist
        noncategorylist = columnslist[:]
        #this removes categorylist elements from noncategorylist
        noncategorylist = list(set(noncategorylist).difference(set(categorylist)))

        #first concatinate the NArows True/False designations to df_train & df_test
        df_train = pd.concat([df_train, trainNArows], axis=1)
        df_test = pd.concat([df_test, testNArows], axis=1)

        #create copy of df_train to serve as training set for fill
        df_train_filltrain = df_train.copy()
        #now delete rows coresponding to True
        df_train_filltrain = df_train_filltrain[df_train_filltrain[trainNArows.columns[0]] == False]

        #now delete columns = columnslist and the NA labels (orig column+'_NArows') from this df
        df_train_filltrain = df_train_filltrain.drop(columnslist, axis=1)
        df_train_filltrain = df_train_filltrain.drop([trainNArows.columns[0]], axis=1)

        #create a copy of df_train[categorylist] for fill train labels
        df_train_filllabel = df_train[categorylist].copy()
        #concatinate with the NArows
        df_train_filllabel = pd.concat([df_train_filllabel, trainNArows], axis=1)
        #drop rows corresponding to True
        df_train_filllabel = df_train_filllabel[df_train_filllabel[trainNArows.columns[0]] == False]

        #delete the NArows column
        df_train_filllabel = df_train_filllabel.drop([trainNArows.columns[0]], axis=1)

        #create features df_train for rows needing infill
        #create copy of df_train (note it already has NArows included)
        df_train_fillfeatures = df_train.copy()
 